From 5d00c8e9be6abb6c33cb1a0a805feceecb0d2ddf Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 6 Feb 2024 16:03:52 +0700 Subject: [PATCH 1/6] implement abstract base class for llamaCPP handler --- controllers/llamaCPP.h | 47 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index adc179489..01d1a09f4 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -24,6 +24,7 @@ #define CPPHTTPLIB_NO_EXCEPTIONS 1 #endif +#include "common/base.h" #include "utils/json.hpp" // auto generated files (update with ./deps.sh) @@ -2510,26 +2511,20 @@ append_to_generated_text_from_generated_token_probs(llama_server_context &llama, using namespace drogon; namespace inferences { -class llamaCPP : public drogon::HttpController { +class llamaCPP : public drogon::HttpController, public ChatProvider { public: - llamaCPP() { - // Some default values for now below - log_disable(); // Disable the log to file feature, reduce bloat for - // target - // system () - } - - ~llamaCPP() { stopBackgroundTask(); } + llamaCPP(); + ~llamaCPP(); METHOD_LIST_BEGIN // list path definitions here; - METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post); + METHOD_ADD(llamaCPP::inference, "chat_completion", Post); METHOD_ADD(llamaCPP::embedding, "embedding", Post); METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post); METHOD_ADD(llamaCPP::unloadModel, "unloadmodel", Get); METHOD_ADD(llamaCPP::modelStatus, "modelstatus", Get); // Openai compatible path - ADD_METHOD_TO(llamaCPP::chatCompletion, "/v1/chat/completions", Post); + ADD_METHOD_TO(llamaCPP::inference, "/v1/chat/completions", Post); ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options); ADD_METHOD_TO(llamaCPP::embedding, "/v1/embeddings", Post); @@ -2537,18 +2532,21 @@ class llamaCPP : public drogon::HttpController { // PATH_ADD("/llama/chat_completion", Post); METHOD_LIST_END - void chatCompletion(const HttpRequestPtr &req, - std::function &&callback); - void handlePrelight(const HttpRequestPtr &req, - std::function &&callback); - void embedding(const HttpRequestPtr &req, - std::function &&callback); - void loadModel(const HttpRequestPtr &req, - std::function &&callback); - void unloadModel(const HttpRequestPtr &req, - std::function &&callback); - void modelStatus(const HttpRequestPtr &req, - std::function &&callback); + void + inference(const HttpRequestPtr &req, + std::function &&callback) override; + void + embedding(const HttpRequestPtr &req, + std::function &&callback) override; + void + loadModel(const HttpRequestPtr &req, + std::function &&callback) override; + void + unloadModel(const HttpRequestPtr &req, + std::function &&callback) override; + void + modelStatus(const HttpRequestPtr &req, + std::function &&callback) override; private: llama_server_context llama; @@ -2569,8 +2567,7 @@ class llamaCPP : public drogon::HttpController { std::string grammar_file_content; bool loadModelImpl(std::shared_ptr jsonBody); - void - chatCompletionImpl(std::shared_ptr jsonBody, + void inferenceImpl(std::shared_ptr jsonBody, std::function &callback); void embeddingImpl(std::shared_ptr jsonBody, std::function &callback); From 7da4bd2ac9cd2ac83aeb3cb7b75de3b5cc9d8544 Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 6 Feb 2024 16:04:10 +0700 Subject: [PATCH 2/6] change the implementations accordingly with new llamaCPP.h --- controllers/llamaCPP.cc | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index ea207a220..8e2f2f22c 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -132,6 +132,15 @@ std::string create_return_json(const std::string &id, const std::string &model, return Json::writeString(writer, root); } +llamaCPP::llamaCPP() { + // Some default values for now below + log_disable(); // Disable the log to file feature, reduce bloat for + // target + // system () +}; + +llamaCPP::~llamaCPP() { stopBackgroundTask(); } + void llamaCPP::warmupModel() { json pseudo; @@ -148,18 +157,7 @@ void llamaCPP::warmupModel() { return; } -void llamaCPP::handlePrelight( - const HttpRequestPtr &req, - std::function &&callback) { - auto resp = drogon::HttpResponse::newHttpResponse(); - resp->setStatusCode(drogon::HttpStatusCode::k200OK); - resp->addHeader("Access-Control-Allow-Origin", "*"); - resp->addHeader("Access-Control-Allow-Methods", "POST, OPTIONS"); - resp->addHeader("Access-Control-Allow-Headers", "*"); - callback(resp); -} - -void llamaCPP::chatCompletion( +void llamaCPP::inference( const HttpRequestPtr &req, std::function &&callback) { @@ -167,10 +165,10 @@ void llamaCPP::chatCompletion( // Check if model is loaded checkModelLoaded(callback); - chatCompletionImpl(jsonBody, callback); + inferenceImpl(jsonBody, callback); } -void llamaCPP::chatCompletionImpl( +void llamaCPP::inferenceImpl( std::shared_ptr jsonBody, std::function &callback) { From 86307796228d61fad17e43a6102533699a6251a1 Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 6 Feb 2024 16:04:27 +0700 Subject: [PATCH 3/6] add base class for inference provider --- common/base.cc | 12 ++++++++++++ common/base.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 common/base.cc create mode 100644 common/base.h diff --git a/common/base.cc b/common/base.cc new file mode 100644 index 000000000..4d34c9a17 --- /dev/null +++ b/common/base.cc @@ -0,0 +1,12 @@ +#include "base.h" + +void BaseProvider::handlePrelight( + const HttpRequestPtr &req, + std::function &&callback) { + auto resp = drogon::HttpResponse::newHttpResponse(); + resp->setStatusCode(drogon::HttpStatusCode::k200OK); + resp->addHeader("Access-Control-Allow-Origin", "*"); + resp->addHeader("Access-Control-Allow-Methods", "POST, OPTIONS"); + resp->addHeader("Access-Control-Allow-Headers", "*"); + callback(resp); +} diff --git a/common/base.h b/common/base.h new file mode 100644 index 000000000..d6531022d --- /dev/null +++ b/common/base.h @@ -0,0 +1,46 @@ +#pragma once +#include + +using namespace drogon; + +#pragma once +#include + +using namespace drogon; + +class BaseProvider { +public: + virtual ~BaseProvider() {} + + // General inference method + virtual void + inference(const HttpRequestPtr &req, + std::function &&callback) = 0; + + // Model management + virtual void + loadModel(const HttpRequestPtr &req, + std::function &&callback) = 0; + virtual void + unloadModel(const HttpRequestPtr &req, + std::function &&callback) = 0; + virtual void + modelStatus(const HttpRequestPtr &req, + std::function &&callback) = 0; + + // Additional methods + void handlePrelight(const HttpRequestPtr &req, + std::function &&callback); +}; + +class ChatProvider : public BaseProvider { +public: + virtual ~ChatProvider() {} + + // Implement embedding functionality specific to chat + virtual void + embedding(const HttpRequestPtr &req, + std::function &&callback) = 0; + + // The derived class can also override other methods if needed +}; From bb7dedd5da20a6f4907d7fa842597c0f22060857 Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 6 Feb 2024 16:05:34 +0700 Subject: [PATCH 4/6] add common to store reusable classes abstract classes etc --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 78443b585..87866c345 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,7 @@ else() endif() aux_source_directory(controllers CTL_SRC) +aux_source_directory(common COMMON_SRC) # aux_source_directory(filters FILTER_SRC) aux_source_directory(plugins # PLUGIN_SRC) aux_source_directory(models MODEL_SRC) @@ -86,7 +87,7 @@ aux_source_directory(controllers CTL_SRC) target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) # ${CMAKE_CURRENT_SOURCE_DIR}/models) -target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC}) +target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC}) # ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC}) # ############################################################################## # uncomment the following line for dynamically loading views set_property(TARGET From 6e8007adb25b2f38c0fa843cd2732bd7243ba53f Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 6 Feb 2024 16:38:18 +0700 Subject: [PATCH 5/6] attempt fix build error --- controllers/llamaCPP.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index 01d1a09f4..ab6c82067 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -2511,7 +2511,7 @@ append_to_generated_text_from_generated_token_probs(llama_server_context &llama, using namespace drogon; namespace inferences { -class llamaCPP : public drogon::HttpController, public ChatProvider { +class llamaCPP : public ChatProvider, public drogon::HttpController { public: llamaCPP(); ~llamaCPP(); From 571f1cad6f674c611a0a1ee9643edc415232b410 Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 6 Feb 2024 19:10:34 +0700 Subject: [PATCH 6/6] remove prelights due to issues --- common/base.cc | 12 ------------ common/base.h | 4 ---- controllers/llamaCPP.h | 6 +++--- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/common/base.cc b/common/base.cc index 4d34c9a17..e69de29bb 100644 --- a/common/base.cc +++ b/common/base.cc @@ -1,12 +0,0 @@ -#include "base.h" - -void BaseProvider::handlePrelight( - const HttpRequestPtr &req, - std::function &&callback) { - auto resp = drogon::HttpResponse::newHttpResponse(); - resp->setStatusCode(drogon::HttpStatusCode::k200OK); - resp->addHeader("Access-Control-Allow-Origin", "*"); - resp->addHeader("Access-Control-Allow-Methods", "POST, OPTIONS"); - resp->addHeader("Access-Control-Allow-Headers", "*"); - callback(resp); -} diff --git a/common/base.h b/common/base.h index d6531022d..6ae3c82eb 100644 --- a/common/base.h +++ b/common/base.h @@ -27,10 +27,6 @@ class BaseProvider { virtual void modelStatus(const HttpRequestPtr &req, std::function &&callback) = 0; - - // Additional methods - void handlePrelight(const HttpRequestPtr &req, - std::function &&callback); }; class ChatProvider : public BaseProvider { diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index ab6c82067..82704a613 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -2511,7 +2511,7 @@ append_to_generated_text_from_generated_token_probs(llama_server_context &llama, using namespace drogon; namespace inferences { -class llamaCPP : public ChatProvider, public drogon::HttpController { +class llamaCPP : public drogon::HttpController, public ChatProvider { public: llamaCPP(); ~llamaCPP(); @@ -2525,10 +2525,10 @@ class llamaCPP : public ChatProvider, public drogon::HttpController { // Openai compatible path ADD_METHOD_TO(llamaCPP::inference, "/v1/chat/completions", Post); - ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options); + // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options); NOTE: prelight will be added back when browser support is properly planned ADD_METHOD_TO(llamaCPP::embedding, "/v1/embeddings", Post); - ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options); + //ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options); // PATH_ADD("/llama/chat_completion", Post); METHOD_LIST_END