diff --git a/cli/dstack/_internal/core/job.py b/cli/dstack/_internal/core/job.py index 5a594c8bf..4081542e1 100644 --- a/cli/dstack/_internal/core/job.py +++ b/cli/dstack/_internal/core/job.py @@ -54,6 +54,14 @@ def serialize(self) -> Dict[str, Any]: req_data["local"] = self.local return req_data + def pretty_format(self): + res = "" + res += f"{self.cpus}xCPUs" + res += f", {self.memory_mib}MB" + if self.gpus: + res += f", {len(self.gpus)}x{self.gpus[0].name}" + return res + class JobRef(BaseModel): @abstractmethod diff --git a/cli/dstack/_internal/core/profile.py b/cli/dstack/_internal/core/profile.py index 78ca87692..377adf3c7 100644 --- a/cli/dstack/_internal/core/profile.py +++ b/cli/dstack/_internal/core/profile.py @@ -11,7 +11,7 @@ DEFAULT_RETRY_LIMIT = 3600 -def mem_size(v: Optional[Union[int, str]]) -> Optional[int]: +def parse_memory(v: Optional[Union[int, str]]) -> Optional[int]: """ Converts human-readable sizes (MB and GB) to megabytes >>> mem_size("512MB") @@ -19,14 +19,13 @@ def mem_size(v: Optional[Union[int, str]]) -> Optional[int]: >>> mem_size("1 GB") 1024 """ - dec_bin = 1000 / 1024 if isinstance(v, str): - m = re.fullmatch(r"(\d+) *([gm]b)?", v.strip().lower()) + m = re.fullmatch(r"(\d+) *([mg]b)?", v.strip().lower()) if not m: raise ValueError(f"Invalid memory size: {v}") - v = int(m.group(1)) * (dec_bin**2) + v = int(m.group(1)) if m.group(2) == "gb": - v = v * 1000 + v = v * 1024 return int(v) @@ -52,15 +51,15 @@ class ProfileGPU(ForbidExtra): name: Optional[str] count: int = 1 memory: Optional[Union[int, str]] - _validate_mem = validator("memory", pre=True, allow_reuse=True)(mem_size) + _validate_mem = validator("memory", pre=True, allow_reuse=True)(parse_memory) class ProfileResources(ForbidExtra): gpu: Optional[Union[int, ProfileGPU]] - memory: Union[int, str] = mem_size(DEFAULT_MEM) + memory: Union[int, str] = parse_memory(DEFAULT_MEM) shm_size: Optional[Union[int, str]] cpu: int = DEFAULT_CPU - _validate_mem = validator("memory", "shm_size", pre=True, allow_reuse=True)(mem_size) + _validate_mem = validator("memory", "shm_size", pre=True, allow_reuse=True)(parse_memory) @validator("gpu", pre=True) def _validate_gpu(cls, v: Optional[Union[int, ProfileGPU]]) -> Optional[ProfileGPU]: diff --git a/cli/dstack/_internal/hub/routers/runs.py b/cli/dstack/_internal/hub/routers/runs.py index 9492bd6b8..5e5e4b953 100644 --- a/cli/dstack/_internal/hub/routers/runs.py +++ b/cli/dstack/_internal/hub/routers/runs.py @@ -29,11 +29,12 @@ async def get_run_plan( for job in body.jobs: instance_type = await run_async(backend.predict_instance_type, job) if instance_type is None: + msg = f"No instance type matching requirements ({job.requirements.pretty_format()})." + if backend.name == "local": + msg += " Ensure that enough CPU and memory are available for Docker containers or lower the requirements." raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=error_detail( - msg=NoMatchingInstanceError.message, code=NoMatchingInstanceError.code - ), + detail=error_detail(msg=msg, code=NoMatchingInstanceError.code), ) try: build = backend.predict_build_plan(job) diff --git a/runner/internal/container/engine.go b/runner/internal/container/engine.go index 7cdcce904..bad5446d5 100644 --- a/runner/internal/container/engine.go +++ b/runner/internal/container/engine.go @@ -3,15 +3,16 @@ package container import ( "context" "fmt" - "github.com/docker/docker/api/types/mount" - "github.com/docker/docker/pkg/jsonmessage" - "github.com/dstackai/dstack/runner/internal/environment" - "github.com/dstackai/dstack/runner/internal/models" "io" "os/exec" "runtime" "strings" + "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/pkg/jsonmessage" + "github.com/dstackai/dstack/runner/internal/environment" + "github.com/dstackai/dstack/runner/internal/models" + "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters"