From 7a611754677a04700395068eec2cfe46bd90d952 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Sun, 1 Jun 2025 00:29:47 +0200 Subject: [PATCH 01/13] [UX]: Improve the output of dstack ps #2655 (WIP) --- frontend/src/libs/run.ts | 57 ++++++++++++++++++- .../pages/Runs/Details/RunDetails/index.tsx | 6 +- .../Runs/List/hooks/useColumnsDefinitions.tsx | 6 +- src/dstack/_internal/cli/utils/run.py | 2 +- src/dstack/_internal/core/models/runs.py | 20 +++++++ 5 files changed, 83 insertions(+), 8 deletions(-) diff --git a/frontend/src/libs/run.ts b/frontend/src/libs/run.ts index 26f6986372..962d747b49 100644 --- a/frontend/src/libs/run.ts +++ b/frontend/src/libs/run.ts @@ -3,7 +3,8 @@ import { StatusIndicatorProps } from '@cloudscape-design/components'; import { IModelExtended } from '../pages/Models/List/types'; -export const getStatusIconType = (status: IRun['status']): StatusIndicatorProps['type'] => { +export const getStatusIconType = (status: IRun['status'] | TJobStatus): StatusIndicatorProps['type'] => { + console.log('status', status); switch (status) { case 'failed': return 'error'; @@ -25,6 +26,60 @@ export const getStatusIconType = (status: IRun['status']): StatusIndicatorProps[ } }; +export const getStatusIconColor = (status: IRun['status'] | TJobStatus, termination_reason: string | null | undefined): StatusIndicatorProps.Color | undefined => { + if (termination_reason === 'failed_to_start_due_to_no_capacity') { + return 'yellow'; + } + + switch (status) { + case 'aborted': + return 'yellow' + default: + return undefined; + } +}; + + +const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1); + +export const getJobSubmissionStatus = (run: IRun): string => { + if (!run.latest_job_submission) { + return capitalize(run.status); + } + + const { status, termination_reason, exit_status } = run.latest_job_submission; + + if (status === 'done') { + return 'Exited (0)'; + } + + if (status === 'failed') { + switch (termination_reason) { + case 'container_exited_with_error': + return `Exited (${exit_status})`; + case 'failed_to_start_due_to_no_capacity': + return 'No offers'; + case 'interrupted_by_no_capacity': + return 'Interrupted'; + default: + return capitalize(status); + } + } + + if (status === 'terminated') { + switch (termination_reason) { + case 'terminated_by_user': + return 'Stopped'; + case 'aborted_by_user': + return 'Aborted'; + default: + return capitalize(status); + } + } + + return status; +}; + export const getExtendedModelFromRun = (run: IRun): IModelExtended | null => { if (!run?.service?.model) return null; diff --git a/frontend/src/pages/Runs/Details/RunDetails/index.tsx b/frontend/src/pages/Runs/Details/RunDetails/index.tsx index c1d9e0a799..d2726c9b76 100644 --- a/frontend/src/pages/Runs/Details/RunDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/RunDetails/index.tsx @@ -7,7 +7,7 @@ import { format } from 'date-fns'; import { Box, ColumnLayout, Container, Header, Loader, StatusIndicator } from 'components'; import { DATE_TIME_FORMAT } from 'consts'; -import { getStatusIconType } from 'libs/run'; +import { getJobSubmissionStatus, getStatusIconType, getStatusIconColor } from 'libs/run'; import { useGetRunQuery } from 'services/run'; import { @@ -82,8 +82,8 @@ export const RunDetails = () => {
{t('projects.run.status')}
- - {t(`projects.run.statuses.${runData.status}`)} + + {getJobSubmissionStatus(runData)}
diff --git a/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx b/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx index e37a46cbf0..bc65f5c1aa 100644 --- a/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx +++ b/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx @@ -5,7 +5,7 @@ import { format } from 'date-fns'; import { NavigateLink, StatusIndicator } from 'components'; import { DATE_TIME_FORMAT } from 'consts'; -import { getRepoNameFromRun, getStatusIconType } from 'libs/run'; +import { getRepoNameFromRun, getStatusIconType, getJobSubmissionStatus, getStatusIconColor } from 'libs/run'; import { ROUTES } from 'routes'; import { @@ -65,8 +65,8 @@ export const useColumnsDefinitions = () => { id: 'status', header: t('projects.run.status'), cell: (item: IRun) => ( - - {t(`projects.run.statuses.${item.status}`)} + + {getJobSubmissionStatus(item)} ), }, diff --git a/src/dstack/_internal/cli/utils/run.py b/src/dstack/_internal/cli/utils/run.py index 79333c3b84..b6959fa9b3 100644 --- a/src/dstack/_internal/cli/utils/run.py +++ b/src/dstack/_internal/cli/utils/run.py @@ -183,7 +183,7 @@ def get_runs_table( status += f" (inactive for {inactive_for})" job_row: Dict[Union[str, int], Any] = { "NAME": f" replica={job.job_spec.replica_num} job={job.job_spec.job_num}", - "STATUS": status, + "STATUS": latest_job_submission.pretty_repr, "SUBMITTED": format_date(latest_job_submission.submitted_at), "ERROR": _get_job_error(job), } diff --git a/src/dstack/_internal/core/models/runs.py b/src/dstack/_internal/core/models/runs.py index de8a284119..db3d9a3242 100644 --- a/src/dstack/_internal/core/models/runs.py +++ b/src/dstack/_internal/core/models/runs.py @@ -301,6 +301,26 @@ def duration(self) -> timedelta: end_time = self.finished_at return end_time - self.submitted_at + @property + def pretty_repr(self) -> str: + status = self.status.value + termination_reason = self.termination_reason + if status == JobStatus.DONE: + return "exited (0)" + elif status == JobStatus.FAILED: + if termination_reason == JobTerminationReason.CONTAINER_EXITED_WITH_ERROR: + return f"exited ({self.exit_status})" + elif termination_reason == JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY: + return "no offers" + elif termination_reason == JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY: + return "interrupted" + elif status == JobStatus.TERMINATED: + if termination_reason == JobTerminationReason.TERMINATED_BY_USER: + return "stopped" + elif termination_reason == JobTerminationReason.ABORTED_BY_USER: + return "aborted" + return status + class Job(CoreModel): job_spec: JobSpec From 80ca955ef4c76e70a3b47b06e53a9302121f8480 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Mon, 2 Jun 2025 17:32:36 +0200 Subject: [PATCH 02/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- frontend/src/libs/run.ts | 53 +++----- .../Details/Jobs/Details/JobDetails/index.tsx | 10 +- .../pages/Runs/Details/Jobs/List/helpers.ts | 15 +++ .../pages/Runs/Details/Jobs/List/hooks.tsx | 14 +- .../pages/Runs/Details/RunDetails/index.tsx | 11 +- .../Runs/List/hooks/useColumnsDefinitions.tsx | 8 +- frontend/src/types/run.d.ts | 7 +- src/dstack/_internal/cli/utils/run.py | 8 +- src/dstack/_internal/core/models/runs.py | 125 +++++++++++------- 9 files changed, 144 insertions(+), 107 deletions(-) diff --git a/frontend/src/libs/run.ts b/frontend/src/libs/run.ts index 962d747b49..c7e35afe15 100644 --- a/frontend/src/libs/run.ts +++ b/frontend/src/libs/run.ts @@ -3,8 +3,10 @@ import { StatusIndicatorProps } from '@cloudscape-design/components'; import { IModelExtended } from '../pages/Models/List/types'; -export const getStatusIconType = (status: IRun['status'] | TJobStatus): StatusIndicatorProps['type'] => { - console.log('status', status); +export const getStatusIconType = (status: IRun['status'] | TJobStatus, termination_reason: string | null | undefined): StatusIndicatorProps['type'] => { + if (termination_reason === 'interrupted_by_no_capacity') { + return 'stopped'; + } switch (status) { case 'failed': return 'error'; @@ -27,11 +29,15 @@ export const getStatusIconType = (status: IRun['status'] | TJobStatus): StatusIn }; export const getStatusIconColor = (status: IRun['status'] | TJobStatus, termination_reason: string | null | undefined): StatusIndicatorProps.Color | undefined => { - if (termination_reason === 'failed_to_start_due_to_no_capacity') { + if (termination_reason === 'failed_to_start_due_to_no_capacity' || + termination_reason === 'interrupted_by_no_capacity' + ) { return 'yellow'; } switch (status) { + case 'pulling': + return 'green' case 'aborted': return 'yellow' default: @@ -42,42 +48,17 @@ export const getStatusIconColor = (status: IRun['status'] | TJobStatus, terminat const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1); -export const getJobSubmissionStatus = (run: IRun): string => { - if (!run.latest_job_submission) { +export const getRunStatusMessage = (run: IRun): string => { + if (run.latest_job_submission?.status_message) { + return capitalize(run.latest_job_submission.status_message); + } else { return capitalize(run.status); } +}; - const { status, termination_reason, exit_status } = run.latest_job_submission; - - if (status === 'done') { - return 'Exited (0)'; - } - - if (status === 'failed') { - switch (termination_reason) { - case 'container_exited_with_error': - return `Exited (${exit_status})`; - case 'failed_to_start_due_to_no_capacity': - return 'No offers'; - case 'interrupted_by_no_capacity': - return 'Interrupted'; - default: - return capitalize(status); - } - } - - if (status === 'terminated') { - switch (termination_reason) { - case 'terminated_by_user': - return 'Stopped'; - case 'aborted_by_user': - return 'Aborted'; - default: - return capitalize(status); - } - } - - return status; +export const getRunError = (run: IRun): string | null => { + const error = run.error ?? run.latest_job_submission?.error ?? null; + return error ? capitalize(error) : null; }; export const getExtendedModelFromRun = (run: IRun): IModelExtended | null => { diff --git a/frontend/src/pages/Runs/Details/Jobs/Details/JobDetails/index.tsx b/frontend/src/pages/Runs/Details/Jobs/Details/JobDetails/index.tsx index f826e9ad6e..424abb6e9f 100644 --- a/frontend/src/pages/Runs/Details/Jobs/Details/JobDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/Jobs/Details/JobDetails/index.tsx @@ -9,6 +9,7 @@ import { useGetRunQuery } from 'services/run'; import { Logs } from '../../../Logs'; import { + getJobError, getJobListItemBackend, getJobListItemInstance, getJobListItemPrice, @@ -16,6 +17,7 @@ import { getJobListItemResources, getJobListItemSpot, getJobStatus, + getJobStatusMessage, getJobSubmittedAt, getJobTerminationReason, } from '../../List/helpers'; @@ -67,15 +69,15 @@ export const JobDetails = () => {
{t('projects.run.status')}
- - {t(`projects.run.statuses.${getJobStatus(jobData)}`)} + + {getJobStatusMessage(jobData)}
- {t('projects.run.termination_reason')} -
{getJobTerminationReason(jobData)}
+ {t('projects.run.error')} +
{getJobError(jobData)}
diff --git a/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts b/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts index 1d8151e903..9c5c112e85 100644 --- a/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts +++ b/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts @@ -45,3 +45,18 @@ export const getJobStatus = (job: IJob) => { export const getJobTerminationReason = (job: IJob) => { return job.job_submissions?.[job.job_submissions.length - 1].termination_reason ?? '-'; }; + +const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1); + +export const getJobStatusMessage = (job: IJob): string | null => { + const latest_submission = job.job_submissions?.[job.job_submissions.length - 1]; + if (latest_submission?.status_message) { + return capitalize(latest_submission.status_message); + } else { + return capitalize(latest_submission.status); + } +}; + +export const getJobError = (job: IJob): string | null => { + return job.job_submissions?.[job.job_submissions.length - 1]?.error ?? null; +}; diff --git a/frontend/src/pages/Runs/Details/Jobs/List/hooks.tsx b/frontend/src/pages/Runs/Details/Jobs/List/hooks.tsx index 677da83a61..35872bd475 100644 --- a/frontend/src/pages/Runs/Details/Jobs/List/hooks.tsx +++ b/frontend/src/pages/Runs/Details/Jobs/List/hooks.tsx @@ -7,6 +7,7 @@ import { getStatusIconType } from 'libs/run'; import { ROUTES } from 'routes'; import { + getJobError, getJobListItemBackend, getJobListItemInstance, getJobListItemPrice, @@ -14,7 +15,9 @@ import { getJobListItemResources, getJobListItemSpot, getJobStatus, + getJobStatusMessage, getJobSubmittedAt, + getJobTerminationReason, } from './helpers'; export const useColumnsDefinitions = ({ projectName, runId }: { projectName: string; runId: string }) => { @@ -43,13 +46,18 @@ export const useColumnsDefinitions = ({ projectName, runId }: { projectName: str cell: (item: IJob) => { const status = getJobStatus(item); - if (!status) return ''; - return ( - {t(`projects.run.statuses.${status}`)} + + {getJobStatusMessage(item)} + ); }, }, + { + id: 'error', + header: t('projects.run.error'), + cell: (item: IJob) => getJobError(item), + }, { id: 'resources', header: `${t('projects.run.resources')}`, diff --git a/frontend/src/pages/Runs/Details/RunDetails/index.tsx b/frontend/src/pages/Runs/Details/RunDetails/index.tsx index d2726c9b76..f3c8bf56a8 100644 --- a/frontend/src/pages/Runs/Details/RunDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/RunDetails/index.tsx @@ -7,7 +7,7 @@ import { format } from 'date-fns'; import { Box, ColumnLayout, Container, Header, Loader, StatusIndicator } from 'components'; import { DATE_TIME_FORMAT } from 'consts'; -import { getJobSubmissionStatus, getStatusIconType, getStatusIconColor } from 'libs/run'; +import { getRunStatusMessage, getStatusIconType, getStatusIconColor, getRunError } from 'libs/run'; import { useGetRunQuery } from 'services/run'; import { @@ -82,12 +82,17 @@ export const RunDetails = () => {
{t('projects.run.status')}
- - {getJobSubmissionStatus(runData)} + + {getRunStatusMessage(runData)}
+
+ {t('projects.run.error')} +
{getRunError(runData)}
+
+ {getRunListItemBackend(runData) && (
{t('projects.run.backend')} diff --git a/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx b/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx index bc65f5c1aa..9e6d078c82 100644 --- a/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx +++ b/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx @@ -5,7 +5,7 @@ import { format } from 'date-fns'; import { NavigateLink, StatusIndicator } from 'components'; import { DATE_TIME_FORMAT } from 'consts'; -import { getRepoNameFromRun, getStatusIconType, getJobSubmissionStatus, getStatusIconColor } from 'libs/run'; +import { getRepoNameFromRun, getStatusIconType, getRunStatusMessage, getRunError, getStatusIconColor } from 'libs/run'; import { ROUTES } from 'routes'; import { @@ -65,15 +65,15 @@ export const useColumnsDefinitions = () => { id: 'status', header: t('projects.run.status'), cell: (item: IRun) => ( - - {getJobSubmissionStatus(item)} + + {getRunStatusMessage(item)} ), }, { id: 'error', header: t('projects.run.error'), - cell: (item: IRun) => item.error ?? '-', + cell: (item: IRun) => getRunError(item), }, { id: 'cost', diff --git a/frontend/src/types/run.d.ts b/frontend/src/types/run.d.ts index ffb1d47dc3..31bcaddbaa 100644 --- a/frontend/src/types/run.d.ts +++ b/frontend/src/types/run.d.ts @@ -116,6 +116,9 @@ declare interface IJobSubmission { submitted_at: number; termination_reason?: string | null; termination_reason_message?: string | null; + exit_status?: number | null; + status_message?: string | null; + error?: string | null; } declare interface IJob { @@ -162,9 +165,9 @@ declare interface IRun { project_name: string; user: string; submitted_at: string; - terminated_at: string; + terminated_at: string | null; status: TJobStatus; - error?: string; + error?: string | null; jobs: IJob[]; run_spec: IRunSpec; latest_job_submission?: IJobSubmission; diff --git a/src/dstack/_internal/cli/utils/run.py b/src/dstack/_internal/cli/utils/run.py index b6959fa9b3..e387a49663 100644 --- a/src/dstack/_internal/cli/utils/run.py +++ b/src/dstack/_internal/cli/utils/run.py @@ -163,14 +163,14 @@ def get_runs_table( table.add_column("ERROR", no_wrap=True, ratio=2) for run in runs: - run_error = _get_run_error(run) run = run._run # TODO(egor-s): make public attribute run_row: Dict[Union[str, int], Any] = { "NAME": run.run_spec.run_name, "SUBMITTED": format_date(run.submitted_at), - "ERROR": run_error, } + if run.error: + run_row["ERROR"] = run.error if len(run.jobs) != 1: run_row["STATUS"] = run.status add_row_from_dict(table, run_row) @@ -183,9 +183,9 @@ def get_runs_table( status += f" (inactive for {inactive_for})" job_row: Dict[Union[str, int], Any] = { "NAME": f" replica={job.job_spec.replica_num} job={job.job_spec.job_num}", - "STATUS": latest_job_submission.pretty_repr, + "STATUS": latest_job_submission.status_message, "SUBMITTED": format_date(latest_job_submission.submitted_at), - "ERROR": _get_job_error(job), + "ERROR": latest_job_submission.error, } jpd = latest_job_submission.job_provisioning_data if jpd is not None: diff --git a/src/dstack/_internal/core/models/runs.py b/src/dstack/_internal/core/models/runs.py index db3d9a3242..47d5fda572 100644 --- a/src/dstack/_internal/core/models/runs.py +++ b/src/dstack/_internal/core/models/runs.py @@ -148,9 +148,6 @@ def to_status(self) -> JobStatus: } return mapping[self] - def pretty_repr(self) -> str: - return " ".join(self.value.split("_")).capitalize() - class Requirements(CoreModel): # TODO: Make requirements' fields required @@ -289,6 +286,9 @@ class JobSubmission(CoreModel): exit_status: Optional[int] job_provisioning_data: Optional[JobProvisioningData] job_runtime_data: Optional[JobRuntimeData] + # TODO: make status_message and error a computed field after migrating to pydanticV2 + status_message: Optional[str] + error: Optional[str] = None @property def age(self) -> timedelta: @@ -301,25 +301,80 @@ def duration(self) -> timedelta: end_time = self.finished_at return end_time - self.submitted_at - @property - def pretty_repr(self) -> str: - status = self.status.value - termination_reason = self.termination_reason + @root_validator + def _status_message(cls, values) -> Dict: + try: + status = values["status"] + termination_reason = values["termination_reason"] + exit_code = values["exit_status"] + except KeyError: + return values + values["status_message"] = JobSubmission._get_status_message( + status=status, + termination_reason=termination_reason, + exit_status=exit_code, + ) + return values + + @staticmethod + def _get_status_message( + status: JobStatus, + termination_reason: Optional[JobTerminationReason], + exit_status: Optional[int], + ) -> str: if status == JobStatus.DONE: return "exited (0)" elif status == JobStatus.FAILED: if termination_reason == JobTerminationReason.CONTAINER_EXITED_WITH_ERROR: - return f"exited ({self.exit_status})" + return f"exited ({exit_status})" elif termination_reason == JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY: return "no offers" elif termination_reason == JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY: return "interrupted" + else: + return "error" elif status == JobStatus.TERMINATED: if termination_reason == JobTerminationReason.TERMINATED_BY_USER: return "stopped" elif termination_reason == JobTerminationReason.ABORTED_BY_USER: return "aborted" - return status + return status.value + + @root_validator + def _error(cls, values) -> Dict: + try: + termination_reason = values["termination_reason"] + except KeyError: + return values + values["error"] = JobSubmission._get_error(termination_reason=termination_reason) + return values + + @staticmethod + def _get_error(termination_reason: Optional[JobTerminationReason]) -> Optional[str]: + if termination_reason == JobTerminationReason.INSTANCE_UNREACHABLE: + return "instance unreachable" + elif termination_reason == JobTerminationReason.WAITING_INSTANCE_LIMIT_EXCEEDED: + return "waiting instance limit exceeded" + elif termination_reason == JobTerminationReason.VOLUME_ERROR: + return "waiting runner limit exceeded" + elif termination_reason == JobTerminationReason.GATEWAY_ERROR: + return "gateway error" + elif termination_reason == JobTerminationReason.SCALED_DOWN: + return "scaled down" + elif termination_reason == JobTerminationReason.INACTIVITY_DURATION_EXCEEDED: + return "inactivity duration exceeded" + elif termination_reason == JobTerminationReason.TERMINATED_DUE_TO_UTILIZATION_POLICY: + return "utilization policy" + elif termination_reason == JobTerminationReason.PORTS_BINDING_FAILED: + return "ports binding failed" + elif termination_reason == JobTerminationReason.CREATING_CONTAINER_ERROR: + return "runner error" + elif termination_reason == JobTerminationReason.EXECUTOR_ERROR: + return "executor error" + elif termination_reason == JobTerminationReason.MAX_DURATION_EXCEEDED: + return "max duration exceeded" + else: + return None class Job(CoreModel): @@ -465,15 +520,20 @@ class Run(CoreModel): def _error(cls, values) -> Dict: try: termination_reason = values["termination_reason"] - jobs = values["jobs"] except KeyError: return values - values["error"] = _get_run_error( - run_termination_reason=termination_reason, - run_jobs=jobs, - ) + values["error"] = Run._get_error(termination_reason=termination_reason) return values + @staticmethod + def _get_error(termination_reason: Optional[RunTerminationReason]) -> Optional[str]: + if termination_reason == RunTerminationReason.RETRY_LIMIT_EXCEEDED: + return "retry limit exceeded" + elif termination_reason == RunTerminationReason.SERVER_ERROR: + return "server error" + else: + return None + class JobPlan(CoreModel): job_spec: JobSpec @@ -522,40 +582,3 @@ def get_policy_map(spot_policy: Optional[SpotPolicy], default: SpotPolicy) -> Op SpotPolicy.ONDEMAND: False, } return policy_map[spot_policy] - - -def _get_run_error( - run_termination_reason: Optional[RunTerminationReason], - run_jobs: List[Job], -) -> str: - if run_termination_reason is None: - return "" - if len(run_jobs) > 1: - return run_termination_reason.name - run_job_termination_reason, exit_status = _get_run_job_termination_reason_and_exit_status( - run_jobs - ) - # For failed runs, also show termination reason to provide more context. - # For other run statuses, the job termination reason will duplicate run status. - if run_job_termination_reason is not None and run_termination_reason in [ - RunTerminationReason.JOB_FAILED, - RunTerminationReason.SERVER_ERROR, - RunTerminationReason.RETRY_LIMIT_EXCEEDED, - ]: - if exit_status: - return ( - f"{run_termination_reason.name}\n({run_job_termination_reason.name} {exit_status})" - ) - return f"{run_termination_reason.name}\n({run_job_termination_reason.name})" - return run_termination_reason.name - - -def _get_run_job_termination_reason_and_exit_status( - run_jobs: List[Job], -) -> tuple[Optional[JobTerminationReason], Optional[int]]: - for job in run_jobs: - if len(job.job_submissions) > 0: - job_submission = job.job_submissions[-1] - if job_submission.termination_reason is not None: - return job_submission.termination_reason, job_submission.exit_status - return None, None From b47d81329ecc38b443db4aa9b3e05d833427a602 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Mon, 2 Jun 2025 18:18:19 +0200 Subject: [PATCH 03/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- src/tests/_internal/server/routers/test_runs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tests/_internal/server/routers/test_runs.py b/src/tests/_internal/server/routers/test_runs.py index 760bbbb662..7265eae2a7 100644 --- a/src/tests/_internal/server/routers/test_runs.py +++ b/src/tests/_internal/server/routers/test_runs.py @@ -381,8 +381,10 @@ def get_dev_env_run_dict( "finished_at": finished_at, "inactivity_secs": None, "status": "submitted", + "status_message": "submitted", "termination_reason": None, "termination_reason_message": None, + "error": None, "exit_status": None, "job_provisioning_data": None, "job_runtime_data": None, @@ -398,8 +400,10 @@ def get_dev_env_run_dict( "inactivity_secs": None, "finished_at": finished_at, "status": "submitted", + "status_message": "submitted", "termination_reason": None, "termination_reason_message": None, + "error": None, "exit_status": None, "job_provisioning_data": None, "job_runtime_data": None, @@ -407,7 +411,7 @@ def get_dev_env_run_dict( "cost": 0.0, "service": None, "termination_reason": None, - "error": "", + "error": None, "deleted": deleted, } @@ -530,6 +534,7 @@ async def test_lists_runs(self, test_db, session: AsyncSession, client: AsyncCli "status": "submitted", "termination_reason_message": None, "termination_reason": None, + "error": None, "exit_status": None, "job_provisioning_data": None, "job_runtime_data": None, From 05e426c4fc2847d5657a651912588fdd4acb01d7 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Mon, 2 Jun 2025 18:24:57 +0200 Subject: [PATCH 04/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- src/tests/_internal/server/routers/test_runs.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tests/_internal/server/routers/test_runs.py b/src/tests/_internal/server/routers/test_runs.py index 7265eae2a7..19ebd367e9 100644 --- a/src/tests/_internal/server/routers/test_runs.py +++ b/src/tests/_internal/server/routers/test_runs.py @@ -515,8 +515,10 @@ async def test_lists_runs(self, test_db, session: AsyncSession, client: AsyncCli "finished_at": None, "inactivity_secs": None, "status": "submitted", + "status_message": "submitted", "termination_reason": None, "termination_reason_message": None, + "error": None, "exit_status": None, "job_provisioning_data": None, "job_runtime_data": None, @@ -532,6 +534,7 @@ async def test_lists_runs(self, test_db, session: AsyncSession, client: AsyncCli "finished_at": None, "inactivity_secs": None, "status": "submitted", + "status_message": "submitted", "termination_reason_message": None, "termination_reason": None, "error": None, @@ -542,7 +545,7 @@ async def test_lists_runs(self, test_db, session: AsyncSession, client: AsyncCli "cost": 0, "service": None, "termination_reason": None, - "error": "", + "error": None, "deleted": False, }, { @@ -558,7 +561,7 @@ async def test_lists_runs(self, test_db, session: AsyncSession, client: AsyncCli "cost": 0, "service": None, "termination_reason": None, - "error": "", + "error": None, "deleted": False, }, ] From e01e135f91a1dfa2e92bf531f846f4575cdba120 Mon Sep 17 00:00:00 2001 From: Oleg Vavilov Date: Mon, 2 Jun 2025 22:02:46 +0300 Subject: [PATCH 05/13] #2716 fixes after review --- .pre-commit-config.yaml | 4 +-- frontend/src/libs/index.ts | 2 ++ frontend/src/libs/run.ts | 27 +++++++------- .../Project/Backends/Table/constants.tsx | 35 +++++++++++++++---- .../Project/Backends/YAMLForm/constants.tsx | 5 ++- .../Project/Details/Settings/constants.tsx | 11 ++++-- .../Project/Gateways/Table/constants.tsx | 8 ++++- .../pages/Runs/Details/Jobs/List/helpers.ts | 3 +- .../pages/Runs/Details/RunDetails/index.tsx | 12 +++++-- .../Runs/List/hooks/useColumnsDefinitions.tsx | 20 +++++++---- 10 files changed, 91 insertions(+), 36 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6ba9d9b410..8326be1350 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,14 +19,14 @@ repos: - id: end-of-file-fixer - repo: https://github.com/pre-commit/mirrors-eslint - rev: '' + rev: 'v9.28.0' hooks: - id: eslint name: ESLint entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore language: node pass_filenames: false - cwd: "frontend/" + cwd: frontend additional_dependencies: - eslint@8.31.0 - eslint-config-prettier@8.10.0 diff --git a/frontend/src/libs/index.ts b/frontend/src/libs/index.ts index 50a163db06..85b8e6ab96 100644 --- a/frontend/src/libs/index.ts +++ b/frontend/src/libs/index.ts @@ -108,3 +108,5 @@ export const isValidUrl = (urlString: string) => { export const includeSubString = (value: string, query: string) => { return value.toLowerCase().includes(query.trim().toLowerCase()); }; + +export const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1); diff --git a/frontend/src/libs/run.ts b/frontend/src/libs/run.ts index c7e35afe15..c0a036f2f2 100644 --- a/frontend/src/libs/run.ts +++ b/frontend/src/libs/run.ts @@ -1,10 +1,15 @@ import { get as _get } from 'lodash'; import { StatusIndicatorProps } from '@cloudscape-design/components'; +import { capitalize } from 'libs'; + import { IModelExtended } from '../pages/Models/List/types'; -export const getStatusIconType = (status: IRun['status'] | TJobStatus, termination_reason: string | null | undefined): StatusIndicatorProps['type'] => { - if (termination_reason === 'interrupted_by_no_capacity') { +export const getStatusIconType = ( + status: IRun['status'] | TJobStatus, + terminationReason: string | null | undefined, +): StatusIndicatorProps['type'] => { + if (terminationReason === 'interrupted_by_no_capacity') { return 'stopped'; } switch (status) { @@ -28,26 +33,24 @@ export const getStatusIconType = (status: IRun['status'] | TJobStatus, terminati } }; -export const getStatusIconColor = (status: IRun['status'] | TJobStatus, termination_reason: string | null | undefined): StatusIndicatorProps.Color | undefined => { - if (termination_reason === 'failed_to_start_due_to_no_capacity' || - termination_reason === 'interrupted_by_no_capacity' - ) { +export const getStatusIconColor = ( + status: IRun['status'] | TJobStatus, + terminationReason: string | null | undefined, +): StatusIndicatorProps.Color | undefined => { + if (terminationReason === 'failed_to_start_due_to_no_capacity' || terminationReason === 'interrupted_by_no_capacity') { return 'yellow'; } switch (status) { case 'pulling': - return 'green' + return 'green'; case 'aborted': - return 'yellow' + return 'yellow'; default: return undefined; } }; - -const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1); - export const getRunStatusMessage = (run: IRun): string => { if (run.latest_job_submission?.status_message) { return capitalize(run.latest_job_submission.status_message); @@ -56,7 +59,7 @@ export const getRunStatusMessage = (run: IRun): string => { } }; -export const getRunError = (run: IRun): string | null => { +export const getRunError = (run: IRun): string | null => { const error = run.error ?? run.latest_job_submission?.error ?? null; return error ? capitalize(error) : null; }; diff --git a/frontend/src/pages/Project/Backends/Table/constants.tsx b/frontend/src/pages/Project/Backends/Table/constants.tsx index ac23a6881c..0d9fb99128 100644 --- a/frontend/src/pages/Project/Backends/Table/constants.tsx +++ b/frontend/src/pages/Project/Backends/Table/constants.tsx @@ -4,13 +4,26 @@ export const BACKENDS_HELP_SKY = { header:

Backends

, body: ( <> -

To use dstack with cloud providers, you have to configure backends.

+

+ To use dstack with cloud providers, you have to configure backends. +

Marketplace

-

By default, dstack Sky includes a preset of backends that let you access compute from the {' '} - dstack marketplace and pay through your dstack Sky user billing.

+

+ By default, dstack Sky includes a preset of backends that let you access compute from the{' '} + dstack marketplace and pay through your dstack Sky user billing. +

Your own cloud accounts

-

You can also configure custom backends to use your own cloud providers, either instead of or in addition to the default ones.

-

See the documentation for the list of supported backends.

+

+ You can also configure custom backends to use your own cloud providers, either instead of or in addition to the + default ones. +

+

+ See the{' '} + + documentation + {' '} + for the list of supported backends. +

), }; @@ -19,8 +32,16 @@ export const BACKENDS_HELP_ENTERPRISE = { header:

Backends

, body: ( <> -

To use dstack with cloud providers, you have to configure backends.

-

See the documentation for the list of supported backends.

+

+ To use dstack with cloud providers, you have to configure backends. +

+

+ See the{' '} + + documentation + {' '} + for the list of supported backends. +

), }; diff --git a/frontend/src/pages/Project/Backends/YAMLForm/constants.tsx b/frontend/src/pages/Project/Backends/YAMLForm/constants.tsx index 9d47cfc830..a139f137d5 100644 --- a/frontend/src/pages/Project/Backends/YAMLForm/constants.tsx +++ b/frontend/src/pages/Project/Backends/YAMLForm/constants.tsx @@ -68,7 +68,10 @@ export const CONFIG_YAML_HELP_ENTERPRISE = {

Each backend type may support different properties. See the{' '} - documentaiton for more examples. + + documentaiton + {' '} + for more examples.

), diff --git a/frontend/src/pages/Project/Details/Settings/constants.tsx b/frontend/src/pages/Project/Details/Settings/constants.tsx index 8963724323..4b3acefce4 100644 --- a/frontend/src/pages/Project/Details/Settings/constants.tsx +++ b/frontend/src/pages/Project/Details/Settings/constants.tsx @@ -6,12 +6,17 @@ export const CLI_INFO = { <>

To use this project with your CLI, add it using the - - dstack project add command. + + dstack project add + {' '} + command.

To learn how to install the CLI, refer to the{' '} - installation guide. + + installation + {' '} + guide.

), diff --git a/frontend/src/pages/Project/Gateways/Table/constants.tsx b/frontend/src/pages/Project/Gateways/Table/constants.tsx index 9b03b807b8..53ec3b15ae 100644 --- a/frontend/src/pages/Project/Gateways/Table/constants.tsx +++ b/frontend/src/pages/Project/Gateways/Table/constants.tsx @@ -5,7 +5,13 @@ export const GATEWAYS_INFO = { body: ( <>

Gateways manage the ingress traffic for running services.

-

To learn more about gateways, see the documentation.

+

+ To learn more about gateways, see the{' '} + + documentation + + . +

), }; diff --git a/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts b/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts index 9c5c112e85..083d80e167 100644 --- a/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts +++ b/frontend/src/pages/Runs/Details/Jobs/List/helpers.ts @@ -1,6 +1,7 @@ import { format } from 'date-fns'; import { DATE_TIME_FORMAT } from 'consts'; +import { capitalize } from 'libs'; export const getJobListItemResources = (job: IJob) => { return job.job_submissions?.[job.job_submissions.length - 1]?.job_provisioning_data?.instance_type?.resources?.description; @@ -46,8 +47,6 @@ export const getJobTerminationReason = (job: IJob) => { return job.job_submissions?.[job.job_submissions.length - 1].termination_reason ?? '-'; }; -const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1); - export const getJobStatusMessage = (job: IJob): string | null => { const latest_submission = job.job_submissions?.[job.job_submissions.length - 1]; if (latest_submission?.status_message) { diff --git a/frontend/src/pages/Runs/Details/RunDetails/index.tsx b/frontend/src/pages/Runs/Details/RunDetails/index.tsx index f3c8bf56a8..2b58c185ff 100644 --- a/frontend/src/pages/Runs/Details/RunDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/RunDetails/index.tsx @@ -7,7 +7,7 @@ import { format } from 'date-fns'; import { Box, ColumnLayout, Container, Header, Loader, StatusIndicator } from 'components'; import { DATE_TIME_FORMAT } from 'consts'; -import { getRunStatusMessage, getStatusIconType, getStatusIconColor, getRunError } from 'libs/run'; +import { getRunError, getRunStatusMessage, getStatusIconColor, getStatusIconType } from 'libs/run'; import { useGetRunQuery } from 'services/run'; import { @@ -47,6 +47,9 @@ export const RunDetails = () => { if (!runData) return null; + const status = runData.latest_job_submission?.status ?? runData.status; + const terminationReason = runData.latest_job_submission?.termination_reason; + return ( <> {t('common.general')}}> @@ -82,7 +85,12 @@ export const RunDetails = () => {
{t('projects.run.status')}
- + {getRunStatusMessage(runData)}
diff --git a/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx b/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx index 9e6d078c82..244c1b07bf 100644 --- a/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx +++ b/frontend/src/pages/Runs/List/hooks/useColumnsDefinitions.tsx @@ -5,7 +5,7 @@ import { format } from 'date-fns'; import { NavigateLink, StatusIndicator } from 'components'; import { DATE_TIME_FORMAT } from 'consts'; -import { getRepoNameFromRun, getStatusIconType, getRunStatusMessage, getRunError, getStatusIconColor } from 'libs/run'; +import { getRepoNameFromRun, getRunError, getRunStatusMessage, getStatusIconColor, getStatusIconType } from 'libs/run'; import { ROUTES } from 'routes'; import { @@ -64,11 +64,19 @@ export const useColumnsDefinitions = () => { { id: 'status', header: t('projects.run.status'), - cell: (item: IRun) => ( - - {getRunStatusMessage(item)} - - ), + cell: (item: IRun) => { + const status = item.latest_job_submission?.status ?? item.status; + const terminationReason = item.latest_job_submission?.termination_reason; + + return ( + + {getRunStatusMessage(item)} + + ); + }, }, { id: 'error', From b5d93291a011be9c33e2199ed3f94e083a3244d3 Mon Sep 17 00:00:00 2001 From: Oleg Vavilov Date: Mon, 2 Jun 2025 22:07:16 +0300 Subject: [PATCH 06/13] #2716 fixes after review --- .pre-commit-config.yaml | 2 +- frontend/src/pages/Runs/Details/RunDetails/index.tsx | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8326be1350..885b1d61fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: end-of-file-fixer - repo: https://github.com/pre-commit/mirrors-eslint - rev: 'v9.28.0' + rev: 'v8.31.0' hooks: - id: eslint name: ESLint diff --git a/frontend/src/pages/Runs/Details/RunDetails/index.tsx b/frontend/src/pages/Runs/Details/RunDetails/index.tsx index 2b58c185ff..c1a307b4b5 100644 --- a/frontend/src/pages/Runs/Details/RunDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/RunDetails/index.tsx @@ -88,7 +88,8 @@ export const RunDetails = () => { {getRunStatusMessage(runData)} From d2668baca6ba02fef0ed2a7dfdf2d84223e69fb9 Mon Sep 17 00:00:00 2001 From: Oleg Vavilov Date: Mon, 2 Jun 2025 22:10:04 +0300 Subject: [PATCH 07/13] #2716 fixes after review --- .pre-commit-config.yaml | 3 +-- frontend/src/pages/Runs/Details/RunDetails/index.tsx | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 885b1d61fa..f2c27b15e8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,10 +23,9 @@ repos: hooks: - id: eslint name: ESLint - entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore + entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore frontend/ language: node pass_filenames: false - cwd: frontend additional_dependencies: - eslint@8.31.0 - eslint-config-prettier@8.10.0 diff --git a/frontend/src/pages/Runs/Details/RunDetails/index.tsx b/frontend/src/pages/Runs/Details/RunDetails/index.tsx index c1a307b4b5..f52bb4c54e 100644 --- a/frontend/src/pages/Runs/Details/RunDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/RunDetails/index.tsx @@ -90,6 +90,7 @@ export const RunDetails = () => { colorOverride={ getStatusIconColor(status, terminationReason) + } > {getRunStatusMessage(runData)} From a88e9ac92c191799ef544912c9df989d18337a03 Mon Sep 17 00:00:00 2001 From: Oleg Vavilov Date: Tue, 3 Jun 2025 10:27:10 +0300 Subject: [PATCH 08/13] #2716 fixes after review --- .pre-commit-config.yaml | 3 ++- frontend/src/pages/Runs/Details/RunDetails/index.tsx | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f2c27b15e8..09287dbfd5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,9 +23,10 @@ repos: hooks: - id: eslint name: ESLint - entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore frontend/ + entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore language: node pass_filenames: false + cwd: "frontend/" additional_dependencies: - eslint@8.31.0 - eslint-config-prettier@8.10.0 diff --git a/frontend/src/pages/Runs/Details/RunDetails/index.tsx b/frontend/src/pages/Runs/Details/RunDetails/index.tsx index f52bb4c54e..b336257d4a 100644 --- a/frontend/src/pages/Runs/Details/RunDetails/index.tsx +++ b/frontend/src/pages/Runs/Details/RunDetails/index.tsx @@ -87,11 +87,7 @@ export const RunDetails = () => {
{getRunStatusMessage(runData)} From 2c850115135063f46368ec8e5a7d86423da8db5c Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Tue, 3 Jun 2025 09:57:38 +0200 Subject: [PATCH 09/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- src/dstack/_internal/cli/utils/run.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dstack/_internal/cli/utils/run.py b/src/dstack/_internal/cli/utils/run.py index e387a49663..f1ff12a19e 100644 --- a/src/dstack/_internal/cli/utils/run.py +++ b/src/dstack/_internal/cli/utils/run.py @@ -154,8 +154,7 @@ def get_runs_table( table.add_column("BACKEND", style="grey58", ratio=2) table.add_column("RESOURCES", ratio=3 if not verbose else 2) if verbose: - table.add_column("INSTANCE", no_wrap=True, ratio=1) - table.add_column("RESERVATION", no_wrap=True, ratio=1) + table.add_column("INSTANCE TYPE", no_wrap=True, ratio=1) table.add_column("PRICE", style="grey58", ratio=1) table.add_column("STATUS", no_wrap=True, ratio=1) table.add_column("SUBMITTED", style="grey58", no_wrap=True, ratio=1) @@ -190,18 +189,19 @@ def get_runs_table( jpd = latest_job_submission.job_provisioning_data if jpd is not None: resources = jpd.instance_type.resources - instance = jpd.instance_type.name + instance_type = jpd.instance_type.name jrd = latest_job_submission.job_runtime_data if jrd is not None and jrd.offer is not None: resources = jrd.offer.instance.resources if jrd.offer.total_blocks > 1: - instance += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})" + instance_type += f" ({jrd.offer.blocks}/{jrd.offer.total_blocks})" + if jpd.reservation: + instance_type += f" ({jpd.reservation})" job_row.update( { "BACKEND": f"{jpd.backend.value.replace('remote', 'ssh')} ({jpd.region})", "RESOURCES": resources.pretty_format(include_spot=True), - "INSTANCE": instance, - "RESERVATION": jpd.reservation, + "INSTANCE TYPE": instance_type, "PRICE": f"${jpd.price:.4f}".rstrip("0").rstrip("."), } ) From 122d1e287a91d1f3c184ea63ee84596a63aee5b7 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Tue, 3 Jun 2025 10:02:40 +0200 Subject: [PATCH 10/13] [UX]: Make run status in UI and CLI easier to understand #2716 Commented ESLint (until warnigns are fixed) --- .pre-commit-config.yaml | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 09287dbfd5..f0688f645a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,20 +18,21 @@ repos: hooks: - id: end-of-file-fixer - - repo: https://github.com/pre-commit/mirrors-eslint - rev: 'v8.31.0' - hooks: - - id: eslint - name: ESLint - entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore - language: node - pass_filenames: false - cwd: "frontend/" - additional_dependencies: - - eslint@8.31.0 - - eslint-config-prettier@8.10.0 - - eslint-plugin-i18n@2.4.0 - - eslint-plugin-prettier@4.2.1 - - eslint-plugin-simple-import-sort@10.0.0 - - '@typescript-eslint/eslint-plugin@5.48.1' - - '@typescript-eslint/parser@5.48.1' + # TODO(oleg): fix ESLint warnings + # - repo: https://github.com/pre-commit/mirrors-eslint + # rev: 'v8.31.0' + # hooks: + # - id: eslint + # name: ESLint + # entry: eslint --fix --config frontend/.eslintrc --ignore-path frontend/.eslintignore + # language: node + # pass_filenames: false + # cwd: "frontend/" + # additional_dependencies: + # - eslint@8.31.0 + # - eslint-config-prettier@8.10.0 + # - eslint-plugin-i18n@2.4.0 + # - eslint-plugin-prettier@4.2.1 + # - eslint-plugin-simple-import-sort@10.0.0 + # - '@typescript-eslint/eslint-plugin@5.48.1' + # - '@typescript-eslint/parser@5.48.1' From 9ce73d10db6363d2409759fe2075c34a5144557c Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Tue, 3 Jun 2025 11:03:51 +0200 Subject: [PATCH 11/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- .../cli/services/configurators/run.py | 75 ++++++++----------- 1 file changed, 31 insertions(+), 44 deletions(-) diff --git a/src/dstack/_internal/cli/services/configurators/run.py b/src/dstack/_internal/cli/services/configurators/run.py index 5e9440213a..e70249d742 100644 --- a/src/dstack/_internal/cli/services/configurators/run.py +++ b/src/dstack/_internal/cli/services/configurators/run.py @@ -3,7 +3,7 @@ import sys import time from pathlib import Path -from typing import Dict, List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set import gpuhunt from pydantic import parse_obj_as @@ -41,7 +41,7 @@ ) from dstack._internal.core.models.repos.base import Repo from dstack._internal.core.models.resources import CPUSpec -from dstack._internal.core.models.runs import JobSubmission, JobTerminationReason, RunStatus +from dstack._internal.core.models.runs import JobSubmission, RunStatus from dstack._internal.core.services.configs import ConfigManager from dstack._internal.core.services.diff import diff_models from dstack._internal.utils.common import local_time @@ -553,35 +553,38 @@ def _print_service_urls(run: Run) -> None: def print_finished_message(run: Run): + status_message = ( + run._run.latest_job_submission.status_message + if run._run.latest_job_submission + else run._run.status_message + ) + error = ( + run._run.latest_job_submission.error if run._run.latest_job_submission else run._run.error + ) + termination_reason = ( + run._run.latest_job_submission.termination_reason + if run._run.latest_job_submission + else None + ) + termination_reason_message = ( + run._run.latest_job_submission.termination_reason_message + if run._run.latest_job_submission + else None + ) if run.status == RunStatus.DONE: - console.print("[code]Done[/]") + console.print(f"[code]{status_message.capitalize()}[/code]") return + else: + str = f"[error]{status_message.capitalize()}[/error]" + if error: + str += f" ([error]{error.capitalize()}[/error])" + console.print(str) - termination_reason, termination_reason_message, exit_status = ( - _get_run_termination_reason_and_exit_status(run) - ) - message = "Run failed due to unknown reason. Check CLI, server, and run logs." - if run.status == RunStatus.TERMINATED: - message = "Run terminated due to unknown reason. Check CLI, server, and run logs." - - if termination_reason == JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY: - message = ( - "All provisioning attempts failed. " - "This is likely due to cloud providers not having enough capacity. " - "Check CLI and server logs for more details." - ) - elif termination_reason is not None: - exit_status_details = f"Exit status: {exit_status}.\n" if exit_status else "" - error_details = ( - f"Error: {termination_reason_message}\n" if termination_reason_message else "" - ) - message = ( - f"Run failed with error code {termination_reason.name}.\n" - f"{exit_status_details}" - f"{error_details}" - f"Check [bold]dstack logs -d {run.name}[/bold] for more details." - ) - console.print(f"[error]{message}[/]") + if termination_reason_message: + console.print(f"[error]{termination_reason_message}[/error]") + + if termination_reason: + console.print(f"Check [code]dstack logs -d {run.name}[/code] for more details.") def get_run_exit_code(run: Run) -> int: @@ -590,22 +593,6 @@ def get_run_exit_code(run: Run) -> int: return 1 -def _get_run_termination_reason_and_exit_status( - run: Run, -) -> Tuple[Optional[JobTerminationReason], Optional[str], Optional[int]]: - if len(run._run.jobs) == 0: - return None, None, None - job = run._run.jobs[0] - if len(job.job_submissions) == 0: - return None, None, None - job_submission = job.job_submissions[0] - return ( - job_submission.termination_reason, - job_submission.termination_reason_message, - job_submission.exit_status, - ) - - def _run_resubmitted(run: Run, current_job_submission: Optional[JobSubmission]) -> bool: if current_job_submission is None or run._run.latest_job_submission is None: return False From 00bb4b3b59ae7bc9c20539fa54afcde53b1f8f2e Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Tue, 3 Jun 2025 12:27:30 +0200 Subject: [PATCH 12/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- src/dstack/_internal/core/models/runs.py | 38 ++++++++------------ src/tests/_internal/core/models/test_runs.py | 36 +++++++++++++++++++ 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/dstack/_internal/core/models/runs.py b/src/dstack/_internal/core/models/runs.py index 47d5fda572..51bee78fc2 100644 --- a/src/dstack/_internal/core/models/runs.py +++ b/src/dstack/_internal/core/models/runs.py @@ -351,30 +351,20 @@ def _error(cls, values) -> Dict: @staticmethod def _get_error(termination_reason: Optional[JobTerminationReason]) -> Optional[str]: - if termination_reason == JobTerminationReason.INSTANCE_UNREACHABLE: - return "instance unreachable" - elif termination_reason == JobTerminationReason.WAITING_INSTANCE_LIMIT_EXCEEDED: - return "waiting instance limit exceeded" - elif termination_reason == JobTerminationReason.VOLUME_ERROR: - return "waiting runner limit exceeded" - elif termination_reason == JobTerminationReason.GATEWAY_ERROR: - return "gateway error" - elif termination_reason == JobTerminationReason.SCALED_DOWN: - return "scaled down" - elif termination_reason == JobTerminationReason.INACTIVITY_DURATION_EXCEEDED: - return "inactivity duration exceeded" - elif termination_reason == JobTerminationReason.TERMINATED_DUE_TO_UTILIZATION_POLICY: - return "utilization policy" - elif termination_reason == JobTerminationReason.PORTS_BINDING_FAILED: - return "ports binding failed" - elif termination_reason == JobTerminationReason.CREATING_CONTAINER_ERROR: - return "runner error" - elif termination_reason == JobTerminationReason.EXECUTOR_ERROR: - return "executor error" - elif termination_reason == JobTerminationReason.MAX_DURATION_EXCEEDED: - return "max duration exceeded" - else: - return None + error_mapping = { + JobTerminationReason.INSTANCE_UNREACHABLE: "instance unreachable", + JobTerminationReason.WAITING_INSTANCE_LIMIT_EXCEEDED: "waiting instance limit exceeded", + JobTerminationReason.VOLUME_ERROR: "waiting runner limit exceeded", + JobTerminationReason.GATEWAY_ERROR: "gateway error", + JobTerminationReason.SCALED_DOWN: "scaled down", + JobTerminationReason.INACTIVITY_DURATION_EXCEEDED: "inactivity duration exceeded", + JobTerminationReason.TERMINATED_DUE_TO_UTILIZATION_POLICY: "utilization policy", + JobTerminationReason.PORTS_BINDING_FAILED: "ports binding failed", + JobTerminationReason.CREATING_CONTAINER_ERROR: "runner error", + JobTerminationReason.EXECUTOR_ERROR: "executor error", + JobTerminationReason.MAX_DURATION_EXCEEDED: "max duration exceeded", + } + return error_mapping.get(termination_reason) class Job(CoreModel): diff --git a/src/tests/_internal/core/models/test_runs.py b/src/tests/_internal/core/models/test_runs.py index 576eba61a1..fc7802ee99 100644 --- a/src/tests/_internal/core/models/test_runs.py +++ b/src/tests/_internal/core/models/test_runs.py @@ -1,6 +1,8 @@ from dstack._internal.core.models.runs import ( JobStatus, + JobSubmission, JobTerminationReason, + Run, RunStatus, RunTerminationReason, ) @@ -22,3 +24,37 @@ def test_job_termination_reason_to_status_works_with_all_enum_varians(): for job_termination_reason in JobTerminationReason: job_status = job_termination_reason.to_status() assert isinstance(job_status, JobStatus) + + +# Will fail if JobTerminationReason value is added without updaing JobSubmission._get_error +def test_get_error_returns_expected_messages(): + no_error_reasons = [ + JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY, + JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY, + JobTerminationReason.WAITING_RUNNER_LIMIT_EXCEEDED, + JobTerminationReason.TERMINATED_BY_USER, + JobTerminationReason.DONE_BY_RUNNER, + JobTerminationReason.ABORTED_BY_USER, + JobTerminationReason.TERMINATED_BY_SERVER, + JobTerminationReason.CONTAINER_EXITED_WITH_ERROR, + ] + + for reason in JobTerminationReason: + if JobSubmission._get_error(reason) is None: + # Fail no-error reason is not in the list + assert reason in no_error_reasons + + +# Will fail if RunTerminationReason value is added without updating Run._get_error +def test_run_get_error_returns_none_for_specific_reasons(): + no_error_reasons = [ + RunTerminationReason.ALL_JOBS_DONE, + RunTerminationReason.JOB_FAILED, + RunTerminationReason.STOPPED_BY_USER, + RunTerminationReason.ABORTED_BY_USER, + ] + + for reason in RunTerminationReason: + if Run._get_error(reason) is None: + # Fail no-error reason is not in the list + assert reason in no_error_reasons From 688779775dde2be1a7e090898ce675c7ecf9ec83 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Tue, 3 Jun 2025 12:42:46 +0200 Subject: [PATCH 13/13] [UX]: Make run status in UI and CLI easier to understand #2716 --- src/dstack/_internal/cli/utils/run.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/dstack/_internal/cli/utils/run.py b/src/dstack/_internal/cli/utils/run.py index f1ff12a19e..d05f63f610 100644 --- a/src/dstack/_internal/cli/utils/run.py +++ b/src/dstack/_internal/cli/utils/run.py @@ -12,7 +12,6 @@ TerminationPolicy, ) from dstack._internal.core.models.runs import ( - Job, RunPlan, ) from dstack._internal.core.services.profiles import get_termination @@ -211,18 +210,3 @@ def get_runs_table( add_row_from_dict(table, job_row, style="secondary" if len(run.jobs) != 1 else None) return table - - -def _get_run_error(run: Run) -> str: - return run._run.error or "" - - -def _get_job_error(job: Job) -> str: - job_submission = job.job_submissions[-1] - termination_reason = job_submission.termination_reason - exit_status = job_submission.exit_status - if termination_reason is None: - return "" - if exit_status: - return f"{termination_reason.name} {exit_status}" - return termination_reason.name