From a864590aeff904bf02fac7782b323fa9f047fd2a Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Tue, 28 Jan 2025 18:59:09 -0600 Subject: [PATCH 1/5] add /apply-template endpoint to server --- examples/server/server.cpp | 9 +++++++++ .../server/tests/unit/test_chat_completion.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b1cde2d7f48dd..270ec90757560 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -4124,6 +4124,14 @@ int main(int argc, char ** argv) { res_ok(res, root); }; + const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { + auto body = json::parse(req.body); + const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; + json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); + + res_ok(res, data); + }; + const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) { handle_embeddings_impl(req, res, OAICOMPAT_TYPE_NONE); }; @@ -4300,6 +4308,7 @@ int main(int argc, char ** argv) { svr->Post("/v1/reranking", handle_rerank); svr->Post("/tokenize", handle_tokenize); svr->Post("/detokenize", handle_detokenize); + svr->Post("/apply-template", handle_apply_template); // LoRA adapters hotswap svr->Get ("/lora-adapters", handle_lora_adapters_list); svr->Post("/lora-adapters", handle_lora_adapters_apply); diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index 2e15348dceecb..9e780d6baec7f 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -121,6 +121,22 @@ def test_chat_template(): assert res.body["__verbose"]["prompt"] == " <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" +def test_apply_chat_template(): + global server + server.chat_template = "command-r" + server.start() + res = server.make_request("POST", "/apply-template", data={ + "max_tokens": 8, + "messages": [ + {"role": "system", "content": "You are a test."}, + {"role": "user", "content":"Hi there"}, + ] + }) + assert res.status_code == 200 + assert "prompt" in res.body + assert res.body["prompt"] == "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a test.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" + + @pytest.mark.parametrize("response_format,n_predicted,re_content", [ ({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\""), ({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]"), From 10448bf9343d0b7c86c89f24865589c210b03781 Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Wed, 29 Jan 2025 08:28:33 -0600 Subject: [PATCH 2/5] remove unnecessary line --- examples/server/tests/unit/test_chat_completion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index 9e780d6baec7f..add3f810f5e99 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -126,7 +126,6 @@ def test_apply_chat_template(): server.chat_template = "command-r" server.start() res = server.make_request("POST", "/apply-template", data={ - "max_tokens": 8, "messages": [ {"role": "system", "content": "You are a test."}, {"role": "user", "content":"Hi there"}, From 453d204d8aaf97fcfd6fb6d641a9409c61e3f06f Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Wed, 29 Jan 2025 08:28:52 -0600 Subject: [PATCH 3/5] add /apply-template documentation --- examples/server/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/server/README.md b/examples/server/README.md index 5022de672d1f5..eef328ddc6831 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -572,6 +572,14 @@ With input 'รก' (utf8 hex: C3 A1) on tinyllama/stories260k `tokens`: Set the tokens to detokenize. +### POST `/apply-template`: Apply chat template to a conversation + +Uses the server's prompt template formatting functionality to convert chat messages to a single string expected by a chat model as input, but does not perform inference. Instead, the prompt string is returned in the `prompt` field of the JSON response. The prompt can then be modified as desired (for example, to insert "Sure!" at the beginning of the model's response) before sending to `/completion` to generate the chat response. + +*Options:* + +`messages`: (Required) Chat turns in the same format as `/v1/chat/completions`. + ### POST `/embedding`: Generate embedding of a given text > [!IMPORTANT] From b407a4e9a79e71a54063b8cae94f61ba282d6891 Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Wed, 29 Jan 2025 10:25:11 -0600 Subject: [PATCH 4/5] return only "prompt" field in /apply-template --- examples/server/server.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 270ec90757560..6e79d424068de 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -4127,8 +4127,10 @@ int main(int argc, char ** argv) { const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { auto body = json::parse(req.body); const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; - json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); + // format and return only the "prompt" field + json data = json::object(); + data["prompt"] = oaicompat_completion_params_parse(body, chat_template, params.use_jinja)["prompt"]; res_ok(res, data); }; From 6f29bcbdae6e11d4d8502bd3df6fdf3421413213 Mon Sep 17 00:00:00 2001 From: Nigel Bosch Date: Wed, 29 Jan 2025 10:29:03 -0600 Subject: [PATCH 5/5] use suggested idea instead of my overly verbose way --- examples/server/server.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6e79d424068de..f98fe85a4ebb1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -4127,11 +4127,9 @@ int main(int argc, char ** argv) { const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { auto body = json::parse(req.body); const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; + json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); - // format and return only the "prompt" field - json data = json::object(); - data["prompt"] = oaicompat_completion_params_parse(body, chat_template, params.use_jinja)["prompt"]; - res_ok(res, data); + res_ok(res, {{ "prompt", data.at("prompt") }}); }; const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) {