From e6a6b01e119d6e90958102ac759842f9bcae40a9 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 21 Dec 2023 22:05:12 +0700 Subject: [PATCH 1/7] remove usage of drogon thread to infer n_parallel --- controllers/llamaCPP.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index d30fd93b5..834c8ec45 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -434,8 +434,6 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) { gpt_params params; // By default will setting based on number of handlers - int drogon_thread = drogon::app().getThreadNum() - 5; - LOG_INFO << "Drogon thread is:" << drogon_thread; if (jsonBody) { if (!jsonBody["mmproj"].isNull()) { LOG_INFO << "MMPROJ FILE detected, multi-model enabled!"; @@ -447,7 +445,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) { params.embedding = jsonBody.get("embedding", true).asBool(); // Check if n_parallel exists in jsonBody, if not, set to drogon_thread params.n_batch = jsonBody.get("n_batch", 512).asInt(); - params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt(); + params.n_parallel = jsonBody.get("n_parallel", 1).asInt(); params.n_threads = jsonBody.get("cpu_threads", std::thread::hardware_concurrency()) .asInt(); From 0b4d08693d3977f7e1f7b5c07e4961e58cd0fb08 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 21 Dec 2023 22:05:45 +0700 Subject: [PATCH 2/7] Always use the number of logical cores as threadpools --- main.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/main.cc b/main.cc index 0c4534493..b2849f248 100644 --- a/main.cc +++ b/main.cc @@ -1,7 +1,9 @@ #include "utils/nitro_utils.h" -#include // for PATH_MAX +#include // Include the algorithm header +#include // for PATH_MAX #include #include +#include #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() @@ -35,11 +37,12 @@ int main(int argc, char *argv[]) { port = std::atoi(argv[3]); // Convert string argument to int } + int logical_cores = std::thread::hardware_concurrency(); nitro_utils::nitro_logo(); LOG_INFO << "Server started, listening at: " << host << ":" << port; LOG_INFO << "Please load your model"; drogon::app().addListener(host, port); - drogon::app().setThreadNum(thread_num + 5); + drogon::app().setThreadNum(std::max(thread_num, logical_cores)); LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); drogon::app().run(); From e6c33c486a669bfdf6f27019e87da75f4dacc11f Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 21 Dec 2023 22:07:52 +0700 Subject: [PATCH 3/7] remove redundant include --- main.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/main.cc b/main.cc index b2849f248..5b16f98de 100644 --- a/main.cc +++ b/main.cc @@ -1,9 +1,7 @@ #include "utils/nitro_utils.h" -#include // Include the algorithm header #include // for PATH_MAX #include #include -#include #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() From 3058054ce0681fb1a4cdee424f923a94ba17af76 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 21 Dec 2023 22:08:19 +0700 Subject: [PATCH 4/7] remove redundant include --- main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cc b/main.cc index 5b16f98de..f0bcb7fb8 100644 --- a/main.cc +++ b/main.cc @@ -1,5 +1,5 @@ #include "utils/nitro_utils.h" -#include // for PATH_MAX +#include // for PATH_MAX #include #include From bf4025b0414d227dd4b4e8b12861ba9cec620846 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 21 Dec 2023 22:27:26 +0700 Subject: [PATCH 5/7] set thread num --- main.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.cc b/main.cc index f0bcb7fb8..254a56fbb 100644 --- a/main.cc +++ b/main.cc @@ -36,11 +36,12 @@ int main(int argc, char *argv[]) { } int logical_cores = std::thread::hardware_concurrency(); + int drogon_thread_num = std::max(thread_num, logical_cores); nitro_utils::nitro_logo(); LOG_INFO << "Server started, listening at: " << host << ":" << port; LOG_INFO << "Please load your model"; drogon::app().addListener(host, port); - drogon::app().setThreadNum(std::max(thread_num, logical_cores)); + drogon::app().setThreadNum(drogon_thread_num); LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); drogon::app().run(); From fee822f190bf6d9a9a4a81802b57e9eff8e4f838 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 22 Dec 2023 07:35:02 +0700 Subject: [PATCH 6/7] add algorithm for compatibility fix possibly --- main.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/main.cc b/main.cc index 254a56fbb..21c2388ec 100644 --- a/main.cc +++ b/main.cc @@ -2,6 +2,7 @@ #include // for PATH_MAX #include #include +#include #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() From 97df4b9ecfbee7495c1926513e1b7dedea97fe8e Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 22 Dec 2023 07:56:20 +0700 Subject: [PATCH 7/7] try fix --- main.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.cc b/main.cc index 21c2388ec..8a0e2f1ff 100644 --- a/main.cc +++ b/main.cc @@ -2,6 +2,7 @@ #include // for PATH_MAX #include #include +#include #include #if defined(__APPLE__) && defined(__MACH__) @@ -12,6 +13,7 @@ #include // for readlink() #elif defined(_WIN32) #include +#undef max #else #error "Unsupported platform!" #endif