diff --git a/src/dstack/_internal/server/background/tasks/process_pools.py b/src/dstack/_internal/server/background/tasks/process_pools.py index affea4aaf..43d1864e4 100644 --- a/src/dstack/_internal/server/background/tasks/process_pools.py +++ b/src/dstack/_internal/server/background/tasks/process_pools.py @@ -80,14 +80,19 @@ async def check_shim(instance_id: UUID) -> None: instance_health = instance_healthcheck(ssh_private_key, job_provisioning_data) - logger.info("check instance %s status: %s", instance.name, instance_health) + logger.debug("check instance %s status: shim health is %s", instance.name, instance_health) if instance_health: if instance.status in (InstanceStatus.CREATING, InstanceStatus.STARTING): - instance.status = InstanceStatus.READY + instance.status = ( + InstanceStatus.READY if instance.job_id is None else InstanceStatus.BUSY + ) await session.commit() else: if instance.status in (InstanceStatus.READY, InstanceStatus.BUSY): + logger.warning( + "instance %s shim is not available, marked as failed", instance.name + ) instance.status = InstanceStatus.FAILED await session.commit() diff --git a/src/dstack/_internal/server/background/tasks/process_submitted_jobs.py b/src/dstack/_internal/server/background/tasks/process_submitted_jobs.py index 4e8fdb9d5..415c4ccb0 100644 --- a/src/dstack/_internal/server/background/tasks/process_submitted_jobs.py +++ b/src/dstack/_internal/server/background/tasks/process_submitted_jobs.py @@ -178,7 +178,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel): pool=pool, created_at=common_utils.get_current_datetime(), started_at=common_utils.get_current_datetime(), - status=InstanceStatus.BUSY, + status=InstanceStatus.STARTING, job_provisioning_data=job_provisioning_data.json(), offer=offer.json(), termination_policy=profile.termination_policy, diff --git a/src/dstack/_internal/server/services/gateways/__init__.py b/src/dstack/_internal/server/services/gateways/__init__.py index 908233d00..18915131e 100644 --- a/src/dstack/_internal/server/services/gateways/__init__.py +++ b/src/dstack/_internal/server/services/gateways/__init__.py @@ -314,6 +314,7 @@ async def init_gateways(session: AsyncSession): ) gateway_computes = res.scalars().all() + logger.debug(f"Connecting to {len(gateway_computes)} gateways...") for gateway, error in await gather_map_async( gateway_computes, lambda g: gateway_connections_pool.add(g.ip_address, g.ssh_private_key),