From 40568090a1e69e3d05ab16aa4456e6fc817a74b3 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Mon, 4 Aug 2025 18:55:45 +0000
Subject: [PATCH 1/5] sad

---
 tests/serve/test_sglang.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py
index 891820eb6a..52ec88c58a 100644
--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -28,7 +28,7 @@ class SGLangProcess(ManagedProcess):
 
     def __init__(self, script_name, request):
         self.port = 8000
-        sglang_dir = "/workspace/examples/sglang"
+        sglang_dir = "/workspace/components/backends/sglang"
         script_path = os.path.join(sglang_dir, "launch", script_name)
 
         # Verify script exists

From e954bc3c5649eb57b8b44794c0ba8d7e6934fb6d Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Mon, 4 Aug 2025 19:02:01 +0000
Subject: [PATCH 2/5] test(tests/serve/test_sglang): add TODO for future HTTP
 endpoint tests

---
 tests/serve/test_sglang.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py
index 52ec88c58a..2434a2bb1f 100644
--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -166,6 +166,9 @@ def test_sglang_disagg_dp_attention(request, runtime_services):
             timeout=120,
         )
 
+        # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around 
+        # flush_cache and expert distribution recording
+
         assert response.status_code == 200
         result = response.json()
         assert "choices" in result

From 24fe09130751d0581da7219fb6320192af74c786 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Mon, 4 Aug 2025 19:26:48 +0000
Subject: [PATCH 3/5] bump

---
 .github/workflows/trigger_ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/trigger_ci.yml b/.github/workflows/trigger_ci.yml
index d0d0d937a8..b9f8c40a80 100644
--- a/.github/workflows/trigger_ci.yml
+++ b/.github/workflows/trigger_ci.yml
@@ -65,6 +65,7 @@ jobs:
             - 'container/Dockerfile.sglang-deepep'
             - 'components/backends/sglang/**'
             - 'container/build.sh'
+            - 'tests/serve/test_sglang.py'
     - name: Check if Validation Workflow has run
       id: check_workflow
       uses: actions/github-script@v6

From 7eefff1949c4cbc19818175496d318f8530b4886 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Mon, 4 Aug 2025 19:28:13 +0000
Subject: [PATCH 4/5] bump

---
 tests/serve/test_sglang.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py
index 2434a2bb1f..554af06203 100644
--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -166,7 +166,7 @@ def test_sglang_disagg_dp_attention(request, runtime_services):
             timeout=120,
         )
 
-        # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around 
+        # TODO: Once this is enabled, we can test out the rest of the HTTP endpoints around
         # flush_cache and expert distribution recording
 
         assert response.status_code == 200

From 56cf1a25c5eb83bede33693a49a24723bb674559 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Mon, 4 Aug 2025 20:16:22 +0000
Subject: [PATCH 5/5] docs(sglang): update multinode examples to reflect new
 model path and run command

---
 components/backends/sglang/docs/multinode-examples.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/components/backends/sglang/docs/multinode-examples.md b/components/backends/sglang/docs/multinode-examples.md
index 2bc0a802ff..d6ae5e32e0 100644
--- a/components/backends/sglang/docs/multinode-examples.md
+++ b/components/backends/sglang/docs/multinode-examples.md
@@ -19,7 +19,7 @@ SGLang allows you to deploy multi-node sized models by adding in the `dist-init-
 Node 1: Run HTTP ingress, processor, and 8 shards of the prefill worker
 ```bash
 # run ingress
-dynamo run in=http out=dyn &
+python3 -m dynamo.frontend --http-port=8000 &
 # run prefill worker
 python3 -m dynamo.sglang.worker \
   --model-path /model/ \
@@ -102,7 +102,7 @@ SGLang typically requires a warmup period to ensure the DeepGEMM kernels are loa
 curl ${HEAD_PREFILL_NODE_IP}:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+    "model": "deepseek-ai/DeepSeek-R1",
     "messages": [
     {
         "role": "user",