diff --git a/.github/workflows/trigger_ci.yml b/.github/workflows/trigger_ci.yml index d0d0d937a8..b9f8c40a80 100644 --- a/.github/workflows/trigger_ci.yml +++ b/.github/workflows/trigger_ci.yml @@ -65,6 +65,7 @@ jobs: - 'container/Dockerfile.sglang-deepep' - 'components/backends/sglang/**' - 'container/build.sh' + - 'tests/serve/test_sglang.py' - name: Check if Validation Workflow has run id: check_workflow uses: actions/github-script@v6 diff --git a/components/backends/sglang/docs/multinode-examples.md b/components/backends/sglang/docs/multinode-examples.md index 2bc0a802ff..d6ae5e32e0 100644 --- a/components/backends/sglang/docs/multinode-examples.md +++ b/components/backends/sglang/docs/multinode-examples.md @@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init- Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker ```bash # run ingress -dynamo run in=http out=dyn & +python3 -m dynamo.frontend --http-port=8000 & # run prefill worker python3 -m dynamo.sglang.worker \ --model-path /model/ \ @@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "model": "deepseek-ai/DeepSeek-R1", "messages": [ { "role": "user", diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py index 891820eb6a..554af06203 100644 --- a/tests/serve/test_sglang.py +++ b/tests/serve/test_sglang.py @@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess): def __init__(self, script_name, request): self.port = 8000 - sglang_dir = "/workspace/examples/sglang" + sglang_dir = "/workspace/components/backends/sglang" script_path = os.path.join(sglang_dir, "launch", script_name) # Verify script exists @@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services): timeout=120, ) + # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around + # flush_cache and expert distribution recording + assert response.status_code == 200 result = response.json() assert "choices" in result