From 627677ca88110d499bbea6bc9cc371ad59c6e0b2 Mon Sep 17 00:00:00 2001 From: tikikun Date: Wed, 8 Nov 2023 17:31:01 +0700 Subject: [PATCH 1/5] add conditional prompt for role --- controllers/llamaCPP.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 57e456768..d132c5e2d 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -94,7 +94,18 @@ void llamaCPP::chatCompletion( const Json::Value &messages = (*jsonBody)["messages"]; for (const auto &message : messages) { - std::string role = message["role"].asString(); + std::string input_role = message["role"].asString(); + std::string role; + if (input_role == "user") { + role = user_prompt; + } else if (input_role == "assistant") { + role = ai_prompt; + } else if (input_role == "system") { + role = system_prompt; + } else { + role = input_role; + } + std::string content = message["content"].asString(); formatted_output += role + ": " + content + "\n"; } From 59acfda9ef99e6ec1b6502b40f516bb6bcdc9fe6 Mon Sep 17 00:00:00 2001 From: tikikun Date: Wed, 8 Nov 2023 17:31:21 +0700 Subject: [PATCH 2/5] add default prompt also init value for user ai and system prompt --- controllers/llamaCPP.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index 192ec358f..74c1c894f 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -2142,5 +2142,8 @@ class llamaCPP : public drogon::HttpController { size_t sent_count = 0; size_t sent_token_probs_index = 0; std::thread backgroundThread; + std::string user_prompt = "USER: "; + std::string ai_prompt = "ASSISTANT: "; + std::string system_prompt = "ASSISTANT's RULE: "; }; }; // namespace inferences From 6ceb4777487f05663414437f9fa3aab0e8e005c4 Mon Sep 17 00:00:00 2001 From: tikikun Date: Wed, 8 Nov 2023 17:40:38 +0700 Subject: [PATCH 3/5] to take user_prompt --- controllers/llamaCPP.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index d132c5e2d..18e6621b3 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -107,7 +107,7 @@ void llamaCPP::chatCompletion( } std::string content = message["content"].asString(); - formatted_output += role + ": " + content + "\n"; + formatted_output += role + content + "\n"; } formatted_output += "assistant:"; @@ -116,8 +116,7 @@ void llamaCPP::chatCompletion( stopWords.push_back(stop_word.asString()); } // specify default stop words - stopWords.push_back("user:"); - stopWords.push_back("### USER:"); + stopWords.push_back(user_prompt); data["stop"] = stopWords; } @@ -224,6 +223,12 @@ void llamaCPP::loadModel( } params.cont_batching = (*jsonBody)["cont_batching"].asBool(); + + // Set up prompt + user_prompt = (*jsonBody)["user_prompt"].asString(); + ai_prompt = (*jsonBody)["ai_prompt"].asString(); + system_prompt = (*jsonBody)["system_prompt"].asString(); + // params.n_threads = (*jsonBody)["n_threads"].asInt(); // params.n_threads_batch = params.n_threads; } From 6ee413dbe74725b886c9ad88234a5c4a82f3d4d0 Mon Sep 17 00:00:00 2001 From: tikikun Date: Wed, 8 Nov 2023 18:18:47 +0700 Subject: [PATCH 4/5] better way to handler jsoncpp --- controllers/llamaCPP.cc | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 18e6621b3..07185f7e8 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -77,7 +77,8 @@ void llamaCPP::chatCompletion( const auto &jsonBody = req->getJsonObject(); std::string formatted_output = - "Below is a conversation between an AI system named ASSISTANT and USER\n"; + "Below is a conversation between an AI system named " + ai_prompt + + " and " + user_prompt + "\n"; json data; json stopWords; @@ -106,6 +107,7 @@ void llamaCPP::chatCompletion( role = input_role; } + LOG_INFO << "VALUE OF ROLE BEING USED:" << role; std::string content = message["content"].asString(); formatted_output += role + content + "\n"; } @@ -212,25 +214,19 @@ void llamaCPP::loadModel( LOG_INFO << "Drogon thread is:" << drogon_thread; if (jsonBody) { params.model = (*jsonBody)["llama_model_path"].asString(); - params.n_gpu_layers = (*jsonBody)["ngl"].asInt(); - params.n_ctx = (*jsonBody)["ctx_len"].asInt(); - params.embedding = (*jsonBody)["embedding"].asBool(); + params.n_gpu_layers = (*jsonBody).get("ngl", 100).asInt(); + params.n_ctx = (*jsonBody).get("ctx_len", 2048).asInt(); + params.embedding = (*jsonBody).get("embedding", true).asBool(); // Check if n_parallel exists in jsonBody, if not, set to drogon_thread - if ((*jsonBody).isMember("n_parallel")) { - params.n_parallel = (*jsonBody)["n_parallel"].asInt(); - } else { - params.n_parallel = drogon_thread; - } + + params.n_parallel = (*jsonBody).get("n_parallel", drogon_thread).asInt(); params.cont_batching = (*jsonBody)["cont_batching"].asBool(); - - // Set up prompt - user_prompt = (*jsonBody)["user_prompt"].asString(); - ai_prompt = (*jsonBody)["ai_prompt"].asString(); - system_prompt = (*jsonBody)["system_prompt"].asString(); - - // params.n_threads = (*jsonBody)["n_threads"].asInt(); - // params.n_threads_batch = params.n_threads; + + this->user_prompt = (*jsonBody).get("user_prompt", "USER: ").asString(); + this->ai_prompt = (*jsonBody).get("ai_prompt", "ASSISTANT: ").asString(); + this->system_prompt = + (*jsonBody).get("system_prompt", "ASSISTANT's RULE: ").asString(); } #ifdef GGML_USE_CUBLAS LOG_INFO << "Setting up GGML CUBLAS PARAMS"; From 2f141f4e36fdb7c9b8dc67e67a52599df13379d3 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 9 Nov 2023 08:42:12 +0700 Subject: [PATCH 5/5] feat: add pre-prompt for role --- controllers/llamaCPP.cc | 2 -- controllers/llamaCPP.h | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 07185f7e8..24b5c9718 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -106,8 +106,6 @@ void llamaCPP::chatCompletion( } else { role = input_role; } - - LOG_INFO << "VALUE OF ROLE BEING USED:" << role; std::string content = message["content"].asString(); formatted_output += role + content + "\n"; } diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index 74c1c894f..037cae926 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -2142,8 +2142,8 @@ class llamaCPP : public drogon::HttpController { size_t sent_count = 0; size_t sent_token_probs_index = 0; std::thread backgroundThread; - std::string user_prompt = "USER: "; - std::string ai_prompt = "ASSISTANT: "; - std::string system_prompt = "ASSISTANT's RULE: "; + std::string user_prompt; + std::string ai_prompt; + std::string system_prompt; }; }; // namespace inferences