VectorInstitute · XkunW · Nov 11, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/README.md b/README.md
@@ -53,7 +53,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
 #### Other commands
 
 * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
-* `status`: Check the model status by providing its Slurm job ID.
+* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
 * `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
 * `list`: List all available model names, or view the default/cached configuration of a specific model.

diff --git a/docs/user_guide.md b/docs/user_guide.md
@@ -149,35 +149,52 @@ Since batch launches use heterogeneous jobs, users can request different partiti
 
 ### `status` command
 
-You can check the inference server status by providing the Slurm job ID to the `status` command:
+You can check the status of all inference servers launched through `vec-inf` by running the `status` command:
+```bash
+vec-inf status
+```
+
+And you should see an output like this:
+```
+┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Job ID    ┃ Model Name ┃ Status  ┃ Base URL              ┃
+┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
+│ 1434429   │ Qwen3-8B   │ READY   │ http://gpu113:8080/v1 │
+│ 1434584   │ Qwen3-14B  │ READY   │ http://gpu053:8080/v1 │
+│ 1435035+0 │ Qwen3-32B  │ PENDING │ UNAVAILABLE           │
+│ 1435035+1 │ Qwen3-14B  │ PENDING │ UNAVAILABLE           │
+└───────────┴────────────┴─────────┴───────────────────────┘
+```
+
+If you want to check why a specific job is pending or failing, append the job ID to the status command:
 
 ```bash
-vec-inf status 15373800
+vec-inf status 1435035+1
 ```
 
 If the server is pending for resources, you should see an output like this:
 
 ```
-┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Job Status     ┃ Value                      ┃
-┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ Model Name     │ Meta-Llama-3.1-8B-Instruct │
-│ Model Status   │ PENDING                    │
-│ Pending Reason │ Resources                  │
-│ Base URL       │ UNAVAILABLE                │
-└────────────────┴────────────────────────────┘
+┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
+┃ Job Status     ┃ Value       ┃
+┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
+│ Model Name     │ Qwen3-14B   │
+│ Model Status   │ PENDING     │
+│ Pending Reason │ Resources   │
+│ Base URL       │ UNAVAILABLE │
+└────────────────┴─────────────┘
 ```
 
 When the server is ready, you should see an output like this:
 
 ```
-┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Job Status   ┃ Value                      ┃
-┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ Model Name   │ Meta-Llama-3.1-8B-Instruct │
-│ Model Status │ READY                      │
-│ Base URL     │ http://gpu042:8080/v1      │
-└──────────────┴────────────────────────────┘
+┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Job Status   ┃ Value                 ┃
+┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
+│ Model Name   │ Qwen3-14B             │
+│ Model Status │ READY                 │
+│ Base URL     │ http://gpu105:8080/v1 │
+└──────────────┴───────────────────────┘
 ```
 
 There are 5 possible states:
@@ -190,7 +207,7 @@ There are 5 possible states:
 
 **Note**
 * The base URL is only available when model is in `READY` state.
-* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 17480109+0, 17480109+1).
+* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 1435035+0, 1435035+1).
 
 ### `metrics` command
 

diff --git a/tests/vec_inf/cli/test_cli.py b/tests/vec_inf/cli/test_cli.py
@@ -135,7 +135,7 @@ def test_list_single_model(runner):
 
 
 def test_status_command(runner):
-    """Test status command."""
+    """Test status command with job ID argument."""
     with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
         mock_client = MagicMock()
         mock_client_class.return_value = mock_client
@@ -154,6 +154,111 @@ def test_status_command(runner):
         assert "Meta-Llama-3.1-8B" in result.output
 
 
+def test_status_command_no_job_id_no_running_jobs(runner):
+    """Test status command with no argument when no jobs are running."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = []
+
+        result = runner.invoke(cli, ["status"])
+
+        assert result.exit_code == 0
+        assert "No running jobs found." in result.output
+
+
+def test_status_command_no_job_id_single_running_job(runner):
+    """Test status command with no argument when one job is running."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = ["12345"]
+
+        mock_status = MagicMock()
+        mock_status.model_name = "test-model-1"
+        mock_status.server_status = "READY"
+        mock_status.base_url = "http://localhost:8000"
+        mock_status.pending_reason = None
+        mock_status.failed_reason = None
+        mock_client.get_status.return_value = mock_status
+
+        result = runner.invoke(cli, ["status"])
+
+        assert result.exit_code == 0
+        assert "test-model-1" in result.output
+        mock_client.fetch_running_jobs.assert_called_once()
+        mock_client.get_status.assert_called_once_with("12345")
+
+
+def test_status_command_no_job_id_multiple_running_jobs(runner):
+    """Test status command with no argument when multiple jobs are running."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
+
+        mock_status_1 = MagicMock()
+        mock_status_1.model_name = "test-model-1"
+        mock_status_1.server_status = "READY"
+        mock_status_1.base_url = "http://localhost:8000"
+        mock_status_1.pending_reason = None
+        mock_status_1.failed_reason = None
+
+        mock_status_2 = MagicMock()
+        mock_status_2.model_name = "test-model-2"
+        mock_status_2.server_status = "PENDING"
+        mock_status_2.base_url = None
+        mock_status_2.pending_reason = "Waiting for resources"
+        mock_status_2.failed_reason = None
+
+        mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
+
+        result = runner.invoke(cli, ["status"])
+
+        assert result.exit_code == 0
+        assert "test-model-1" in result.output
+        assert "test-model-2" in result.output
+        assert "12345" in result.output
+        assert "67890" in result.output
+        mock_client.fetch_running_jobs.assert_called_once()
+        assert mock_client.get_status.call_count == 2
+
+
+def test_status_command_no_job_id_multiple_jobs_json_mode(runner):
+    """Test status command with no argument and JSON mode for multiple jobs."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
+
+        mock_status_1 = MagicMock()
+        mock_status_1.model_name = "test-model-1"
+        mock_status_1.server_status = "READY"
+        mock_status_1.base_url = "http://localhost:8000"
+        mock_status_1.pending_reason = None
+        mock_status_1.failed_reason = None
+
+        mock_status_2 = MagicMock()
+        mock_status_2.model_name = "test-model-2"
+        mock_status_2.server_status = "FAILED"
+        mock_status_2.base_url = None
+        mock_status_2.pending_reason = None
+        mock_status_2.failed_reason = "Out of memory"
+
+        mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
+
+        result = runner.invoke(cli, ["status", "--json-mode"])
+
+        assert result.exit_code == 0
+        output = json.loads(result.output)
+        assert isinstance(output, list)
+        assert len(output) == 2
+        assert output[0]["model_name"] == "test-model-1"
+        assert output[0]["model_status"] == "READY"
+        assert output[1]["model_name"] == "test-model-2"
+        assert output[1]["model_status"] == "FAILED"
+
+
 def test_shutdown_command(runner):
     """Test shutdown command."""
     with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class: