From eab798abe5778340ef02bb302ba45f8b8a058e4e Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 18 Jul 2024 10:35:30 +0700 Subject: [PATCH 1/3] feat: support running multiple engines at the same time --- cortex-cpp/controllers/server.cc | 68 +++++++++++++++++++------------- cortex-cpp/controllers/server.h | 4 +- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 3c3ac0e9f..6b639b736 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -32,6 +32,10 @@ server::~server() {} void server::ChatCompletion( const HttpRequestPtr& req, std::function&& callback) { + if(!HasFieldInReq(req, callback, "engine")) { + return; + } + auto engine_type = (*(req->getJsonObject())).get("engine", cur_engine_type_).asString(); if (!IsEngineLoaded(engine_type)) { @@ -91,6 +95,10 @@ void server::Embedding(const HttpRequestPtr& req, void server::UnloadModel( const HttpRequestPtr& req, std::function&& callback) { + if(!HasFieldInReq(req, callback, "engine")) { + return; + } + auto engine_type = (*(req->getJsonObject())).get("engine", cur_engine_type_).asString(); if (!IsEngineLoaded(engine_type)) { @@ -118,6 +126,10 @@ void server::UnloadModel( void server::ModelStatus( const HttpRequestPtr& req, std::function&& callback) { + if(!HasFieldInReq(req, callback, "engine")) { + return; + } + auto engine_type = (*(req->getJsonObject())).get("engine", cur_engine_type_).asString(); if (!IsEngineLoaded(engine_type)) { @@ -145,7 +157,7 @@ void server::ModelStatus( void server::GetModels(const HttpRequestPtr& req, std::function&& callback) { - if (!IsEngineLoaded(cur_engine_type_)) { + if (engines_.empty()) { Json::Value res; res["message"] = "Engine is not loaded yet"; auto resp = cortex_utils::nitroHttpJsonResponse(res); @@ -156,24 +168,22 @@ void server::GetModels(const HttpRequestPtr& req, } LOG_TRACE << "Start to get models"; - auto& en = std::get(engines_[cur_engine_type_].engine); - if (en->IsSupported("GetModels")) { - en->GetModels( - req->getJsonObject(), - [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = cortex_utils::nitroHttpJsonResponse(res); - resp->setStatusCode(static_cast( - status["status_code"].asInt())); - cb(resp); - }); - } else { - Json::Value res; - res["message"] = "Method is not supported yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); - resp->setStatusCode(k500InternalServerError); - callback(resp); - LOG_WARN << "Method is not supported yet"; + Json::Value resp_data(Json::arrayValue); + for (auto const& [k, v] : engines_) { + auto e = std::get(v.engine); + if (e->IsSupported("GetModels")) { + e->GetModels(req->getJsonObject(), + [&resp_data](Json::Value status, Json::Value res) { + resp_data.append(res); + }); + } } + Json::Value root; + root["data"] = resp_data; + root["object"] = "list"; + auto resp = cortex_utils::nitroHttpJsonResponse(root); + resp->setStatusCode(drogon::HttpStatusCode::k200OK); + callback(resp); LOG_TRACE << "Done get models"; } @@ -259,8 +269,6 @@ void server::LoadModel(const HttpRequestPtr& req, // We have not loaded engine yet, should load it before using it if (engines_.find(engine_type) == engines_.end()) { - // We only use single engine so unload all engines before load new engine - UnloadEngines(); auto get_engine_path = [](std::string_view e) { if (e == kLlamaEngine) { return cortex_utils::kLlamaLibPath; @@ -363,14 +371,20 @@ bool server::IsEngineLoaded(const std::string& e) { return engines_.find(e) != engines_.end(); } -void server::UnloadEngines() { - // We unload all engines except python engine - for (auto it = engines_.begin(); it != engines_.end();) { - if (it->first != kPythonRuntimeEngine) { - it = engines_.erase(it); - } else - it++; +bool server::HasFieldInReq( + const HttpRequestPtr& req, + std::function& callback, + const std::string& field) { + if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) { + Json::Value res; + res["message"] = "No " + field + " field in request body"; + auto resp = cortex_utils::nitroHttpJsonResponse(res); + resp->setStatusCode(k409Conflict); + callback(resp); + LOG_WARN << "No " << field << " field in request body"; + return false; } + return true; } } // namespace inferences diff --git a/cortex-cpp/controllers/server.h b/cortex-cpp/controllers/server.h index 87f544f27..a25288612 100644 --- a/cortex-cpp/controllers/server.h +++ b/cortex-cpp/controllers/server.h @@ -99,7 +99,9 @@ class server : public drogon::HttpController, SyncQueue& q); bool IsEngineLoaded(const std::string& e); - void UnloadEngines(); + bool HasFieldInReq(const HttpRequestPtr& req, + std::function& callback, + const std::string& field); private: struct SyncQueue { From 6de90a9da14d82aeb3d942c80be44a022fa47d44 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 18 Jul 2024 10:41:42 +0700 Subject: [PATCH 2/3] refactor: rename functions --- cortex-cpp/controllers/health.cc | 2 +- cortex-cpp/controllers/server.cc | 34 ++++++++++++++++---------------- cortex-cpp/utils/cortex_utils.h | 6 +++--- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cortex-cpp/controllers/health.cc b/cortex-cpp/controllers/health.cc index db4df4b22..22fc0bfd6 100644 --- a/cortex-cpp/controllers/health.cc +++ b/cortex-cpp/controllers/health.cc @@ -4,7 +4,7 @@ void health::asyncHandleHttpRequest( const HttpRequestPtr &req, std::function &&callback) { - auto resp = cortex_utils::nitroHttpResponse(); + auto resp = cortex_utils::CreateCortexHttpResponse(); resp->setStatusCode(k200OK); resp->setContentTypeCode(CT_TEXT_HTML); resp->setBody("cortex-cpp is alive!!!"); diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 6b639b736..8429923b1 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -41,7 +41,7 @@ void server::ChatCompletion( if (!IsEngineLoaded(engine_type)) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -73,7 +73,7 @@ void server::Embedding(const HttpRequestPtr& req, if (!IsEngineLoaded(engine_type)) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -104,7 +104,7 @@ void server::UnloadModel( if (!IsEngineLoaded(engine_type)) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -115,7 +115,7 @@ void server::UnloadModel( ->UnloadModel( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(static_cast( status["status_code"].asInt())); cb(resp); @@ -135,7 +135,7 @@ void server::ModelStatus( if (!IsEngineLoaded(engine_type)) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -147,7 +147,7 @@ void server::ModelStatus( ->GetModelStatus( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(static_cast( status["status_code"].asInt())); cb(resp); @@ -160,7 +160,7 @@ void server::GetModels(const HttpRequestPtr& req, if (engines_.empty()) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -181,7 +181,7 @@ void server::GetModels(const HttpRequestPtr& req, Json::Value root; root["data"] = resp_data; root["object"] = "list"; - auto resp = cortex_utils::nitroHttpJsonResponse(root); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(root); resp->setStatusCode(drogon::HttpStatusCode::k200OK); callback(resp); @@ -203,7 +203,7 @@ void server::GetEngines( res["object"] = "list"; res["data"] = engine_array; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); callback(resp); } @@ -228,7 +228,7 @@ void server::FineTuning( Json::Value res; res["message"] = "Could not load engine " + engine_type; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k500InternalServerError); callback(resp); return; @@ -246,7 +246,7 @@ void server::FineTuning( en->HandlePythonFileExecutionRequest( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(static_cast( status["status_code"].asInt())); cb(resp); @@ -254,7 +254,7 @@ void server::FineTuning( } else { Json::Value res; res["message"] = "Method is not supported yet"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k500InternalServerError); callback(resp); LOG_WARN << "Method is not supported yet"; @@ -299,7 +299,7 @@ void server::LoadModel(const HttpRequestPtr& req, Json::Value res; res["message"] = "Could not load engine " + engine_type; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k500InternalServerError); callback(resp); return; @@ -316,7 +316,7 @@ void server::LoadModel(const HttpRequestPtr& req, auto& en = std::get(engines_[engine_type].engine); en->LoadModel(req->getJsonObject(), [cb = std::move(callback)]( Json::Value status, Json::Value res) { - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode( static_cast(status["status_code"].asInt())); cb(resp); @@ -353,7 +353,7 @@ void server::ProcessStreamRes(std::function cb, return n; }; - auto resp = cortex_utils::nitroStreamResponse(chunked_content_provider, + auto resp = cortex_utils::CreateCortexStreamResponse(chunked_content_provider, "chat_completions.txt"); cb(resp); } @@ -361,7 +361,7 @@ void server::ProcessStreamRes(std::function cb, void server::ProcessNonStreamRes(std::function cb, SyncQueue& q) { auto [status, res] = q.wait_and_pop(); - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode( static_cast(status["status_code"].asInt())); cb(resp); @@ -378,7 +378,7 @@ bool server::HasFieldInReq( if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) { Json::Value res; res["message"] = "No " + field + " field in request body"; - auto resp = cortex_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "No " << field << " field in request body"; diff --git a/cortex-cpp/utils/cortex_utils.h b/cortex-cpp/utils/cortex_utils.h index c4319ca57..6f8a89658 100644 --- a/cortex-cpp/utils/cortex_utils.h +++ b/cortex-cpp/utils/cortex_utils.h @@ -258,7 +258,7 @@ inline void nitro_logo() { std::cout << resetColor; // Reset color at the endreturn; } -inline drogon::HttpResponsePtr nitroHttpResponse() { +inline drogon::HttpResponsePtr CreateCortexHttpResponse() { auto resp = drogon::HttpResponse::newHttpResponse(); #ifdef ALLOW_ALL_CORS LOG_INFO << "Respond for all cors!"; @@ -267,7 +267,7 @@ inline drogon::HttpResponsePtr nitroHttpResponse() { return resp; } -inline drogon::HttpResponsePtr nitroHttpJsonResponse(const Json::Value& data) { +inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(const Json::Value& data) { auto resp = drogon::HttpResponse::newHttpJsonResponse(data); #ifdef ALLOW_ALL_CORS LOG_INFO << "Respond for all cors!"; @@ -277,7 +277,7 @@ inline drogon::HttpResponsePtr nitroHttpJsonResponse(const Json::Value& data) { return resp; }; -inline drogon::HttpResponsePtr nitroStreamResponse( +inline drogon::HttpResponsePtr CreateCortexStreamResponse( const std::function& callback, const std::string& attachmentFileName = "") { auto resp = drogon::HttpResponse::newStreamResponse( From 26a4768895e55485c9e254f252c0ae384a5c97c9 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 18 Jul 2024 11:06:32 +0700 Subject: [PATCH 3/3] fix: append models to list --- cortex-cpp/controllers/server.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 8429923b1..485695b66 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -32,7 +32,7 @@ server::~server() {} void server::ChatCompletion( const HttpRequestPtr& req, std::function&& callback) { - if(!HasFieldInReq(req, callback, "engine")) { + if (!HasFieldInReq(req, callback, "engine")) { return; } @@ -95,7 +95,7 @@ void server::Embedding(const HttpRequestPtr& req, void server::UnloadModel( const HttpRequestPtr& req, std::function&& callback) { - if(!HasFieldInReq(req, callback, "engine")) { + if (!HasFieldInReq(req, callback, "engine")) { return; } @@ -126,7 +126,7 @@ void server::UnloadModel( void server::ModelStatus( const HttpRequestPtr& req, std::function&& callback) { - if(!HasFieldInReq(req, callback, "engine")) { + if (!HasFieldInReq(req, callback, "engine")) { return; } @@ -174,7 +174,9 @@ void server::GetModels(const HttpRequestPtr& req, if (e->IsSupported("GetModels")) { e->GetModels(req->getJsonObject(), [&resp_data](Json::Value status, Json::Value res) { - resp_data.append(res); + for (auto r : res["data"]) { + resp_data.append(r); + } }); } } @@ -354,7 +356,7 @@ void server::ProcessStreamRes(std::function cb, }; auto resp = cortex_utils::CreateCortexStreamResponse(chunked_content_provider, - "chat_completions.txt"); + "chat_completions.txt"); cb(resp); }