From a864590aeff904bf02fac7782b323fa9f047fd2a Mon Sep 17 00:00:00 2001
From: Nigel Bosch <pnigelb@gmail.com>
Date: Tue, 28 Jan 2025 18:59:09 -0600
Subject: [PATCH 1/5] add /apply-template endpoint to server

---
 examples/server/server.cpp                       |  9 +++++++++
 .../server/tests/unit/test_chat_completion.py    | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index b1cde2d7f48dd..270ec90757560 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -4124,6 +4124,14 @@ int main(int argc, char ** argv) {
         res_ok(res, root);
     };
 
+    const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
+        auto body = json::parse(req.body);
+        const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
+        json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
+
+        res_ok(res, data);
+    };
+
     const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) {
         handle_embeddings_impl(req, res, OAICOMPAT_TYPE_NONE);
     };
@@ -4300,6 +4308,7 @@ int main(int argc, char ** argv) {
     svr->Post("/v1/reranking",        handle_rerank);
     svr->Post("/tokenize",            handle_tokenize);
     svr->Post("/detokenize",          handle_detokenize);
+    svr->Post("/apply-template",      handle_apply_template);
     // LoRA adapters hotswap
     svr->Get ("/lora-adapters",       handle_lora_adapters_list);
     svr->Post("/lora-adapters",       handle_lora_adapters_apply);
diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py
index 2e15348dceecb..9e780d6baec7f 100644
--- a/examples/server/tests/unit/test_chat_completion.py
+++ b/examples/server/tests/unit/test_chat_completion.py
@@ -121,6 +121,22 @@ def test_chat_template():
     assert res.body["__verbose"]["prompt"] == "<s> <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
 
 
+def test_apply_chat_template():
+    global server
+    server.chat_template = "command-r"
+    server.start()
+    res = server.make_request("POST", "/apply-template", data={
+        "max_tokens": 8,
+        "messages": [
+            {"role": "system", "content": "You are a test."},
+            {"role": "user", "content":"Hi there"},
+        ]
+    })
+    assert res.status_code == 200
+    assert "prompt" in res.body
+    assert res.body["prompt"] == "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a test.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
+
+
 @pytest.mark.parametrize("response_format,n_predicted,re_content", [
     ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\""),
     ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]"),

From 10448bf9343d0b7c86c89f24865589c210b03781 Mon Sep 17 00:00:00 2001
From: Nigel Bosch <pnigelb@gmail.com>
Date: Wed, 29 Jan 2025 08:28:33 -0600
Subject: [PATCH 2/5] remove unnecessary line

---
 examples/server/tests/unit/test_chat_completion.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py
index 9e780d6baec7f..add3f810f5e99 100644
--- a/examples/server/tests/unit/test_chat_completion.py
+++ b/examples/server/tests/unit/test_chat_completion.py
@@ -126,7 +126,6 @@ def test_apply_chat_template():
     server.chat_template = "command-r"
     server.start()
     res = server.make_request("POST", "/apply-template", data={
-        "max_tokens": 8,
         "messages": [
             {"role": "system", "content": "You are a test."},
             {"role": "user", "content":"Hi there"},

From 453d204d8aaf97fcfd6fb6d641a9409c61e3f06f Mon Sep 17 00:00:00 2001
From: Nigel Bosch <pnigelb@gmail.com>
Date: Wed, 29 Jan 2025 08:28:52 -0600
Subject: [PATCH 3/5] add /apply-template documentation

---
 examples/server/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/server/README.md b/examples/server/README.md
index 5022de672d1f5..eef328ddc6831 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -572,6 +572,14 @@ With input 'á' (utf8 hex: C3 A1) on tinyllama/stories260k
 
 `tokens`: Set the tokens to detokenize.
 
+### POST `/apply-template`: Apply chat template to a conversation
+
+Uses the server's prompt template formatting functionality to convert chat messages to a single string expected by a chat model as input, but does not perform inference. Instead, the prompt string is returned in the `prompt` field of the JSON response. The prompt can then be modified as desired (for example, to insert "Sure!" at the beginning of the model's response) before sending to `/completion` to generate the chat response.
+
+*Options:*
+
+`messages`: (Required) Chat turns in the same format as `/v1/chat/completions`.
+
 ### POST `/embedding`: Generate embedding of a given text
 
 > [!IMPORTANT]

From b407a4e9a79e71a54063b8cae94f61ba282d6891 Mon Sep 17 00:00:00 2001
From: Nigel Bosch <pnigelb@gmail.com>
Date: Wed, 29 Jan 2025 10:25:11 -0600
Subject: [PATCH 4/5] return only "prompt" field in /apply-template

---
 examples/server/server.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 270ec90757560..6e79d424068de 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -4127,8 +4127,10 @@ int main(int argc, char ** argv) {
     const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
         auto body = json::parse(req.body);
         const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
-        json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
 
+        // format and return only the "prompt" field
+        json data = json::object();
+        data["prompt"] = oaicompat_completion_params_parse(body, chat_template, params.use_jinja)["prompt"];
         res_ok(res, data);
     };
 

From 6f29bcbdae6e11d4d8502bd3df6fdf3421413213 Mon Sep 17 00:00:00 2001
From: Nigel Bosch <pnigelb@gmail.com>
Date: Wed, 29 Jan 2025 10:29:03 -0600
Subject: [PATCH 5/5] use suggested idea instead of my overly verbose way

---
 examples/server/server.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 6e79d424068de..f98fe85a4ebb1 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -4127,11 +4127,9 @@ int main(int argc, char ** argv) {
     const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
         auto body = json::parse(req.body);
         const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
+        json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
 
-        // format and return only the "prompt" field
-        json data = json::object();
-        data["prompt"] = oaicompat_completion_params_parse(body, chat_template, params.use_jinja)["prompt"];
-        res_ok(res, data);
+        res_ok(res, {{ "prompt", data.at("prompt") }});
     };
 
     const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) {