ai-dynamo · ishandhanani · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025
@@ -65,6 +65,7 @@ jobs:
             - 'container/Dockerfile.sglang-deepep'
             - 'components/backends/sglang/**'
             - 'container/build.sh'
+            - 'tests/serve/test_sglang.py'
     - name: Check if Validation Workflow has run
       id: check_workflow
       uses: actions/github-script@v6

diff --git a/components/backends/sglang/docs/multinode-examples.md b/components/backends/sglang/docs/multinode-examples.md
@@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init-
 Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker
 ```bash
 # run ingress
-dynamo run in=http out=dyn &
+python3 -m dynamo.frontend --http-port=8000 &
 # run prefill worker
 python3 -m dynamo.sglang.worker \
   --model-path /model/ \
@@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa
 curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+    "model": "deepseek-ai/DeepSeek-R1",
     "messages": [
     {
         "role": "user",

@@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess):
 
     def __init__(self, script_name, request):
         self.port = 8000
-        sglang_dir = "/workspace/examples/sglang"
+        sglang_dir = "/workspace/components/backends/sglang"
         script_path = os.path.join(sglang_dir, "launch", script_name)
 
         # Verify script exists
@@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services):
             timeout=120,
         )
 
+        # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around
+        # flush_cache and expert distribution recording
+
         assert response.status_code == 200
         result = response.json()
         assert "choices" in result