From 40568090a1e69e3d05ab16aa4456e6fc817a74b3 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 4 Aug 2025 18:55:45 +0000 Subject: [PATCH 1/5] sad --- tests/serve/test_sglang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py index 891820eb6a..52ec88c58a 100644 --- a/tests/serve/test_sglang.py +++ b/tests/serve/test_sglang.py @@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess): def __init__(self, script_name, request): self.port = 8000 - sglang_dir = "/workspace/examples/sglang" + sglang_dir = "/workspace/components/backends/sglang" script_path = os.path.join(sglang_dir, "launch", script_name) # Verify script exists From e954bc3c5649eb57b8b44794c0ba8d7e6934fb6d Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 4 Aug 2025 19:02:01 +0000 Subject: [PATCH 2/5] test(tests/serve/test_sglang): add TODO for future HTTP endpoint tests --- tests/serve/test_sglang.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py index 52ec88c58a..2434a2bb1f 100644 --- a/tests/serve/test_sglang.py +++ b/tests/serve/test_sglang.py @@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services): timeout=120, ) + # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around + # flush_cache and expert distribution recording + assert response.status_code == 200 result = response.json() assert "choices" in result From 24fe09130751d0581da7219fb6320192af74c786 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 4 Aug 2025 19:26:48 +0000 Subject: [PATCH 3/5] bump --- .github/workflows/trigger_ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/trigger_ci.yml b/.github/workflows/trigger_ci.yml index d0d0d937a8..b9f8c40a80 100644 --- a/.github/workflows/trigger_ci.yml +++ b/.github/workflows/trigger_ci.yml @@ -65,6 +65,7 @@ jobs: - 'container/Dockerfile.sglang-deepep' - 'components/backends/sglang/**' - 'container/build.sh' + - 'tests/serve/test_sglang.py' - name: Check if Validation Workflow has run id: check_workflow uses: actions/github-script@v6 From 7eefff1949c4cbc19818175496d318f8530b4886 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 4 Aug 2025 19:28:13 +0000 Subject: [PATCH 4/5] bump --- tests/serve/test_sglang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py index 2434a2bb1f..554af06203 100644 --- a/tests/serve/test_sglang.py +++ b/tests/serve/test_sglang.py @@ -166,7 +166,7 @@ def test_sglang_disagg_dp_attention(request, runtime_services): timeout=120, ) - # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around + # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around # flush_cache and expert distribution recording assert response.status_code == 200 From 56cf1a25c5eb83bede33693a49a24723bb674559 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Mon, 4 Aug 2025 20:16:22 +0000 Subject: [PATCH 5/5] docs(sglang): update multinode examples to reflect new model path and run command --- components/backends/sglang/docs/multinode-examples.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/backends/sglang/docs/multinode-examples.md b/components/backends/sglang/docs/multinode-examples.md index 2bc0a802ff..d6ae5e32e0 100644 --- a/components/backends/sglang/docs/multinode-examples.md +++ b/components/backends/sglang/docs/multinode-examples.md @@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init- Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker ```bash # run ingress -dynamo run in=http out=dyn & +python3 -m dynamo.frontend --http-port=8000 & # run prefill worker python3 -m dynamo.sglang.worker \ --model-path /model/ \ @@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "model": "deepseek-ai/DeepSeek-R1", "messages": [ { "role": "user",