dstackai · un-def · Nov 6, 2025 · Nov 5, 2025
diff --git a/src/dstack/_internal/cli/utils/run.py b/src/dstack/_internal/cli/utils/run.py
@@ -5,16 +5,19 @@
 from rich.table import Table
 
 from dstack._internal.cli.utils.common import NO_OFFERS_WARNING, add_row_from_dict, console
+from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
-from dstack._internal.core.models.instances import InstanceAvailability, Resources
+from dstack._internal.core.models.instances import (
+    InstanceAvailability,
+    InstanceOfferWithAvailability,
+    InstanceType,
+)
 from dstack._internal.core.models.profiles import (
     DEFAULT_RUN_TERMINATION_IDLE_TIME,
     TerminationPolicy,
 )
 from dstack._internal.core.models.runs import (
     Job,
-    JobProvisioningData,
-    JobRuntimeData,
     JobStatus,
     JobSubmission,
     Probe,
@@ -294,27 +297,24 @@ def _format_price(price: float, is_spot: bool) -> str:
     return price_str
 
 
-def _format_backend(backend: Any, region: str) -> str:
-    backend_str = getattr(backend, "value", backend)
-    backend_str = backend_str.replace("remote", "ssh")
+def _format_backend(backend_type: BackendType, region: str) -> str:
+    backend_str = backend_type.value
+    if backend_type == BackendType.REMOTE:
+        backend_str = "ssh"
     return f"{backend_str} ({region})"
 
 
-def _format_instance_type(jpd: JobProvisioningData, jrd: Optional[JobRuntimeData]) -> str:
-    instance_type = jpd.instance_type.name
-    if jrd is not None and getattr(jrd, "offer", None) is not None:
-        if jrd.offer.total_blocks > 1:
-            instance_type += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})"
-    if jpd.reservation:
-        instance_type += f" ({jpd.reservation})"
-    return instance_type
-
-
-def _get_resources(jpd: JobProvisioningData, jrd: Optional[JobRuntimeData]) -> Resources:
-    resources: Resources = jpd.instance_type.resources
-    if jrd is not None and getattr(jrd, "offer", None) is not None:
-        resources = jrd.offer.instance.resources
-    return resources
+def _format_instance_type(
+    instance_type: InstanceType,
+    shared_offer: Optional[InstanceOfferWithAvailability],
+    reservation: Optional[str],
+) -> str:
+    instance_type_str = instance_type.name
+    if shared_offer is not None:
+        instance_type_str += f" ({shared_offer.blocks}/{shared_offer.total_blocks})"
+    if reservation is not None:
+        instance_type_str += f" ({reservation})"
+    return instance_type_str
 
 
 def _format_run_name(run: CoreRun, show_deployment_num: bool) -> str:
@@ -387,16 +387,35 @@ def get_runs_table(
             }
             jpd = latest_job_submission.job_provisioning_data
             if jpd is not None:
+                shared_offer: Optional[InstanceOfferWithAvailability] = None
+                instance_type = jpd.instance_type
+                price = jpd.price
                 jrd = latest_job_submission.job_runtime_data
-                resources = _get_resources(jpd, jrd)
-                update_dict: Dict[Union[str, int], Any] = {
-                    "BACKEND": _format_backend(jpd.backend, jpd.region),
-                    "RESOURCES": resources.pretty_format(include_spot=False),
-                    "GPU": resources.pretty_format(gpu_only=True, include_spot=False),
-                    "INSTANCE TYPE": _format_instance_type(jpd, jrd),
-                    "PRICE": _format_price(jpd.price, resources.spot),
-                }
-                job_row.update(update_dict)
+                if jrd is not None and jrd.offer is not None and jrd.offer.total_blocks > 1:
+                    # We only use offer data from jrd if the job is/was running on a shared
+                    # instance (the instance blocks feature). In that case, jpd contains the full
+                    # instance offer data, while jrd contains the shared offer (a fraction of
+                    # the full offer). Although jrd always contains the offer, we don't use it in
+                    # other cases, as, unlike jpd offer data, jrd offer is not updated after
+                    # Compute.update_provisioning_data() call, but some backends, namely
+                    # Kubernetes, may update offer data via that method.
+                    # As long as we don't have a backend which both supports the blocks feature
+                    # and may update offer data in update_provisioning_data(), this logic is fine.
+                    shared_offer = jrd.offer
+                    instance_type = shared_offer.instance
+                    price = shared_offer.price
+                resources = instance_type.resources
+                job_row.update(
+                    {
+                        "BACKEND": _format_backend(jpd.backend, jpd.region),
+                        "RESOURCES": resources.pretty_format(include_spot=False),
+                        "GPU": resources.pretty_format(gpu_only=True, include_spot=False),
+                        "INSTANCE TYPE": _format_instance_type(
+                            instance_type, shared_offer, jpd.reservation
+                        ),
+                        "PRICE": _format_price(price, resources.spot),
+                    }
+                )
             if merge_job_rows:
                 _status = job_row["STATUS"]
                 _resources = job_row["RESOURCES"]

diff --git a/src/dstack/_internal/core/backends/kubernetes/compute.py b/src/dstack/_internal/core/backends/kubernetes/compute.py
@@ -50,7 +50,7 @@
     Resources,
     SSHConnectionParams,
 )
-from dstack._internal.core.models.resources import CPUSpec, Memory
+from dstack._internal.core.models.resources import CPUSpec, GPUSpec, Memory
 from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
 from dstack._internal.core.models.volumes import Volume
 from dstack._internal.utils.common import parse_memory
@@ -123,38 +123,10 @@ def get_offers_by_requirements(
         )
         nodes = get_value(node_list, ".items", list[client.V1Node], required=True)
         for node in nodes:
-            try:
-                name = get_value(node, ".metadata.name", str, required=True)
-                cpu_arch = normalize_arch(
-                    get_value(node, ".status.node_info.architecture", str)
-                ).to_cpu_architecture()
-                allocatable = get_value(node, ".status.allocatable", dict[str, str], required=True)
-                cpus = _parse_cpu(allocatable["cpu"])
-                memory_mib = _parse_memory(allocatable["memory"])
-                disk_size_mib = _parse_memory(allocatable["ephemeral-storage"])
-                gpus = _get_node_gpus(node)
-            except (AttributeError, KeyError, ValueError) as e:
-                logger.exception("Failed to process node: %s: %s", type(e).__name__, e)
-                continue
-            instance_offer = InstanceOfferWithAvailability(
-                backend=BackendType.KUBERNETES,
-                instance=InstanceType(
-                    name=name,
-                    resources=Resources(
-                        cpus=cpus,
-                        cpu_arch=cpu_arch,
-                        memory_mib=memory_mib,
-                        gpus=gpus,
-                        spot=False,
-                        disk=Disk(size_mib=disk_size_mib),
-                    ),
-                ),
-                price=0,
-                region=DUMMY_REGION,
-                availability=InstanceAvailability.AVAILABLE,
-                instance_runtime=InstanceRuntime.RUNNER,
-            )
-            instance_offers.extend(filter_offers_by_requirements([instance_offer], requirements))
+            if (instance_offer := _get_instance_offer_from_node(node)) is not None:
+                instance_offers.extend(
+                    filter_offers_by_requirements([instance_offer], requirements)
+                )
         return instance_offers
 
     def run_job(
@@ -216,18 +188,17 @@ def run_job(
         assert isinstance(resources_spec.cpu, CPUSpec)
         if (cpu_min := resources_spec.cpu.count.min) is not None:
             resources_requests["cpu"] = str(cpu_min)
+        if (cpu_max := resources_spec.cpu.count.max) is not None:
+            resources_limits["cpu"] = str(cpu_max)
         if (gpu_spec := resources_spec.gpu) is not None:
             gpu_min = gpu_spec.count.min
             if gpu_min is not None and gpu_min > 0:
-                if not (offer_gpus := instance_offer.instance.resources.gpus):
-                    raise ComputeError(
-                        "GPU is requested but the offer has no GPUs:"
-                        f" {gpu_spec=} {instance_offer=}",
-                    )
                 gpu_resource, node_affinity, node_taint = _get_pod_spec_parameters_for_gpu(
-                    self.api, offer_gpus[0]
+                    self.api, gpu_spec
                 )
                 logger.debug("Requesting GPU resource: %s=%d", gpu_resource, gpu_min)
+                # Limit must be set (GPU resources cannot be overcommitted)
+                # and must be equal to request.
                 resources_requests[gpu_resource] = resources_limits[gpu_resource] = str(gpu_min)
                 # It should be NoSchedule, but we also add NoExecute toleration just in case.
                 for effect in [TaintEffect.NO_SCHEDULE, TaintEffect.NO_EXECUTE]:
@@ -238,11 +209,13 @@ def run_job(
                     )
         if (memory_min := resources_spec.memory.min) is not None:
             resources_requests["memory"] = _render_memory(memory_min)
-        if (
-            resources_spec.disk is not None
-            and (disk_min := resources_spec.disk.size.min) is not None
-        ):
-            resources_requests["ephemeral-storage"] = _render_memory(disk_min)
+        if (memory_max := resources_spec.memory.max) is not None:
+            resources_limits["memory"] = _render_memory(memory_max)
+        if (disk_spec := resources_spec.disk) is not None:
+            if (disk_min := disk_spec.size.min) is not None:
+                resources_requests["ephemeral-storage"] = _render_memory(disk_min)
+            if (disk_max := disk_spec.size.max) is not None:
+                resources_limits["ephemeral-storage"] = _render_memory(disk_max)
         if (shm_size := resources_spec.shm_size) is not None:
             shm_volume_name = "dev-shm"
             volumes_.append(
@@ -328,8 +301,9 @@ def run_job(
             instance_type=instance_offer.instance,
             instance_id=instance_name,
             # Although we can already get Service's ClusterIP from the `V1Service` object returned
-            # by the `create_namespaced_service` method, we still need PodIP for multinode runs.
-            # We'll update both hostname and internal_ip once the pod is assigned to the node.
+            # by the `create_namespaced_service` method, we still need 1) updated instance offer
+            # 2) PodIP for multinode runs.
+            # We'll update all these fields once the pod is assigned to the node.
             hostname=None,
             internal_ip=None,
             region=instance_offer.region,
@@ -368,6 +342,15 @@ def update_provisioning_data(
             namespace=self.config.namespace,
         )
         provisioning_data.hostname = get_value(service, ".spec.cluster_ip", str, required=True)
+        node = call_api_method(
+            self.api.read_node,
+            client.V1Node,
+            name=get_value(pod, ".spec.node_name", str, required=True),
+        )
+        if (instance_offer := _get_instance_offer_from_node(node)) is not None:
+            provisioning_data.instance_type = instance_offer.instance
+            provisioning_data.region = instance_offer.region
+            provisioning_data.price = instance_offer.price
 
     def terminate_instance(
         self, instance_id: str, region: str, backend_data: Optional[str] = None
@@ -500,6 +483,40 @@ def terminate_gateway(
         )
 
 
+def _get_instance_offer_from_node(node: client.V1Node) -> Optional[InstanceOfferWithAvailability]:
+    try:
+        name = get_value(node, ".metadata.name", str, required=True)
+        cpu_arch = normalize_arch(
+            get_value(node, ".status.node_info.architecture", str)
+        ).to_cpu_architecture()
+        allocatable = get_value(node, ".status.allocatable", dict[str, str], required=True)
+        cpus = _parse_cpu(allocatable["cpu"])
+        memory_mib = _parse_memory(allocatable["memory"])
+        disk_size_mib = _parse_memory(allocatable["ephemeral-storage"])
+        gpus = _get_node_gpus(node)
+    except (AttributeError, KeyError, ValueError) as e:
+        logger.exception("Failed to process node: %s: %s", type(e).__name__, e)
+        return None
+    return InstanceOfferWithAvailability(
+        backend=BackendType.KUBERNETES,
+        instance=InstanceType(
+            name=name,
+            resources=Resources(
+                cpus=cpus,
+                cpu_arch=cpu_arch,
+                memory_mib=memory_mib,
+                gpus=gpus,
+                spot=False,
+                disk=Disk(size_mib=disk_size_mib),
+            ),
+        ),
+        price=0,
+        region=DUMMY_REGION,
+        availability=InstanceAvailability.AVAILABLE,
+        instance_runtime=InstanceRuntime.RUNNER,
+    )
+
+
 def _parse_cpu(cpu: str) -> int:
     if cpu.endswith("m"):
         # "m" means millicpu (1/1000 CPU), e.g., 7900m -> 7.9 -> 7
@@ -590,36 +607,39 @@ def _get_amd_gpu_from_node_labels(labels: dict[str, str]) -> Optional[Gpu]:
 
 
 def _get_pod_spec_parameters_for_gpu(
-    api: client.CoreV1Api, gpu: Gpu
+    api: client.CoreV1Api, gpu_spec: GPUSpec
 ) -> tuple[str, client.V1NodeAffinity, str]:
-    gpu_vendor = gpu.vendor
-    assert gpu_vendor is not None
-    if gpu_vendor == AcceleratorVendor.NVIDIA:
-        node_affinity = _get_nvidia_gpu_node_affinity(api, gpu)
+    node_list = call_api_method(api.list_node, client.V1NodeList)
+    nodes = get_value(node_list, ".items", list[client.V1Node], required=True)
+    gpu_vendor = gpu_spec.vendor
+    # If no vendor specified, we assume it's NVIDIA. Technically, it's possible to request either
+    # NVIDIA or AMD in the run configuration using only GPU names (e.g.,`gpu: H100,MI300X:8`),
+    # but we ignore such configurations as it's hard to translate them to K8s request.
+    if gpu_vendor is None or gpu_vendor == AcceleratorVendor.NVIDIA:
+        node_affinity = _get_nvidia_gpu_node_affinity(gpu_spec, nodes)
         return NVIDIA_GPU_RESOURCE, node_affinity, NVIDIA_GPU_NODE_TAINT
     if gpu_vendor == AcceleratorVendor.AMD:
-        node_affinity = _get_amd_gpu_node_affinity(gpu)
+        node_affinity = _get_amd_gpu_node_affinity(gpu_spec, nodes)
         return AMD_GPU_RESOURCE, node_affinity, AMD_GPU_NODE_TAINT
     raise ComputeError(f"Unsupported GPU vendor: {gpu_vendor}")
 
 
-def _get_nvidia_gpu_node_affinity(api: client.CoreV1Api, gpu: Gpu) -> client.V1NodeAffinity:
+def _get_nvidia_gpu_node_affinity(
+    gpu_spec: GPUSpec, nodes: list[client.V1Node]
+) -> client.V1NodeAffinity:
     matching_gpu_label_values: set[str] = set()
-    # We cannot generate an expected GPU label value from the Gpu model instance
-    # as the actual values may have additional components (socket, memory type, etc.)
-    # that we don't preserve in the Gpu model, e.g., "NVIDIA-H100-80GB-HBM3".
-    # Moreover, a single Gpu may match multiple label values.
-    # As a workaround, we iterate and process all node labels once again (we already
-    # processed them in `get_offers_by_requirements()`).
-    node_list = call_api_method(api.list_node, client.V1NodeList)
-    nodes = get_value(node_list, ".items", list[client.V1Node], required=True)
     for node in nodes:
         labels = get_value(node, ".metadata.labels", dict[str, str]) or {}
-        if _get_nvidia_gpu_from_node_labels(labels) == gpu:
+        gpu = _get_nvidia_gpu_from_node_labels(labels)
+        if gpu is not None and _gpu_matches_gpu_spec(gpu, gpu_spec):
             matching_gpu_label_values.add(labels[NVIDIA_GPU_PRODUCT_LABEL])
     if not matching_gpu_label_values:
-        raise ComputeError(f"NVIDIA GPU is requested but no matching GPU labels found: {gpu=}")
-    logger.debug("Selecting nodes by labels %s for NVIDIA %s", matching_gpu_label_values, gpu.name)
+        raise ComputeError(
+            f"NVIDIA GPU is requested but no matching GPU labels found: {gpu_spec=}"
+        )
+    logger.debug(
+        "Selecting nodes by labels %s for NVIDIA %s", matching_gpu_label_values, gpu_spec.name
+    )
     return client.V1NodeAffinity(
         required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
             node_selector_terms=[
@@ -637,10 +657,15 @@ def _get_nvidia_gpu_node_affinity(api: client.CoreV1Api, gpu: Gpu) -> client.V1N
     )
 
 
-def _get_amd_gpu_node_affinity(gpu: Gpu) -> client.V1NodeAffinity:
-    device_ids = AMD_GPU_NAME_TO_DEVICE_IDS.get(gpu.name)
-    if device_ids is None:
-        raise ComputeError(f"AMD GPU is requested but no matching device ids found: {gpu=}")
+def _get_amd_gpu_node_affinity(
+    gpu_spec: GPUSpec, nodes: list[client.V1Node]
+) -> client.V1NodeAffinity:
+    matching_device_ids: set[int] = set()
+    for node in nodes:
+        labels = get_value(node, ".metadata.labels", dict[str, str]) or {}
+        gpu = _get_amd_gpu_from_node_labels(labels)
+        if gpu is not None and _gpu_matches_gpu_spec(gpu, gpu_spec):
+            matching_device_ids.update(AMD_GPU_NAME_TO_DEVICE_IDS[gpu.name])
     return client.V1NodeAffinity(
         required_during_scheduling_ignored_during_execution=client.V1NodeSelector(
             node_selector_terms=[
@@ -652,12 +677,35 @@ def _get_amd_gpu_node_affinity(gpu: Gpu) -> client.V1NodeAffinity:
                         ),
                     ],
                 )
-                for device_id in device_ids
+                for device_id in matching_device_ids
             ],
         ),
     )
 
 
+def _gpu_matches_gpu_spec(gpu: Gpu, gpu_spec: GPUSpec) -> bool:
+    if gpu_spec.vendor is not None and gpu.vendor != gpu_spec.vendor:
+        return False
+    if gpu_spec.name is not None and gpu.name.lower() not in map(str.lower, gpu_spec.name):
+        return False
+    if gpu_spec.memory is not None:
+        min_memory_gib = gpu_spec.memory.min
+        if min_memory_gib is not None and gpu.memory_mib < min_memory_gib * 1024:
+            return False
+        max_memory_gib = gpu_spec.memory.max
+        if max_memory_gib is not None and gpu.memory_mib > max_memory_gib * 1024:
+            return False
+    if gpu_spec.compute_capability is not None:
+        if gpu.vendor != AcceleratorVendor.NVIDIA:
+            return False
+        gpu_info = NVIDIA_GPU_NAME_TO_GPU_INFO.get(gpu.name)
+        if gpu_info is None:
+            return False
+        if gpu_info.compute_capability < gpu_spec.compute_capability:
+            return False
+    return True
+
+
 def _continue_setup_jump_pod(
     api: client.CoreV1Api,
     namespace: str,