From e6a6b01e119d6e90958102ac759842f9bcae40a9 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Thu, 21 Dec 2023 22:05:12 +0700
Subject: [PATCH 1/7] remove usage of drogon thread to infer n_parallel

---
 controllers/llamaCPP.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index d30fd93b5..834c8ec45 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -434,8 +434,6 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
   gpt_params params;
 
   // By default will setting based on number of handlers
-  int drogon_thread = drogon::app().getThreadNum() - 5;
-  LOG_INFO << "Drogon thread is:" << drogon_thread;
   if (jsonBody) {
     if (!jsonBody["mmproj"].isNull()) {
       LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
@@ -447,7 +445,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
     params.embedding = jsonBody.get("embedding", true).asBool();
     // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
     params.n_batch = jsonBody.get("n_batch", 512).asInt();
-    params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt();
+    params.n_parallel = jsonBody.get("n_parallel", 1).asInt();
     params.n_threads =
         jsonBody.get("cpu_threads", std::thread::hardware_concurrency())
             .asInt();

From 0b4d08693d3977f7e1f7b5c07e4961e58cd0fb08 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Thu, 21 Dec 2023 22:05:45 +0700
Subject: [PATCH 2/7] Always use the number of logical cores as threadpools

---
 main.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/main.cc b/main.cc
index 0c4534493..b2849f248 100644
--- a/main.cc
+++ b/main.cc
@@ -1,7 +1,9 @@
 #include "utils/nitro_utils.h"
-#include <climits> // for PATH_MAX
+#include <algorithm> // Include the algorithm header
+#include <climits>   // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
+#include <thread>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h> // for dirname()
@@ -35,11 +37,12 @@ int main(int argc, char *argv[]) {
     port = std::atoi(argv[3]); // Convert string argument to int
   }
 
+  int logical_cores = std::thread::hardware_concurrency();
   nitro_utils::nitro_logo();
   LOG_INFO << "Server started, listening at: " << host << ":" << port;
   LOG_INFO << "Please load your model";
   drogon::app().addListener(host, port);
-  drogon::app().setThreadNum(thread_num + 5);
+  drogon::app().setThreadNum(std::max(thread_num, logical_cores));
   LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
 
   drogon::app().run();

From e6c33c486a669bfdf6f27019e87da75f4dacc11f Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Thu, 21 Dec 2023 22:07:52 +0700
Subject: [PATCH 3/7] remove redundant include

---
 main.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/main.cc b/main.cc
index b2849f248..5b16f98de 100644
--- a/main.cc
+++ b/main.cc
@@ -1,9 +1,7 @@
 #include "utils/nitro_utils.h"
-#include <algorithm> // Include the algorithm header
 #include <climits>   // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
-#include <thread>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h> // for dirname()

From 3058054ce0681fb1a4cdee424f923a94ba17af76 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Thu, 21 Dec 2023 22:08:19 +0700
Subject: [PATCH 4/7] remove redundant include

---
 main.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.cc b/main.cc
index 5b16f98de..f0bcb7fb8 100644
--- a/main.cc
+++ b/main.cc
@@ -1,5 +1,5 @@
 #include "utils/nitro_utils.h"
-#include <climits>   // for PATH_MAX
+#include <climits> // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
 

From bf4025b0414d227dd4b4e8b12861ba9cec620846 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Thu, 21 Dec 2023 22:27:26 +0700
Subject: [PATCH 5/7] set thread num

---
 main.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.cc b/main.cc
index f0bcb7fb8..254a56fbb 100644
--- a/main.cc
+++ b/main.cc
@@ -36,11 +36,12 @@ int main(int argc, char *argv[]) {
   }
 
   int logical_cores = std::thread::hardware_concurrency();
+  int drogon_thread_num = std::max(thread_num, logical_cores);
   nitro_utils::nitro_logo();
   LOG_INFO << "Server started, listening at: " << host << ":" << port;
   LOG_INFO << "Please load your model";
   drogon::app().addListener(host, port);
-  drogon::app().setThreadNum(std::max(thread_num, logical_cores));
+  drogon::app().setThreadNum(drogon_thread_num);
   LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
 
   drogon::app().run();

From fee822f190bf6d9a9a4a81802b57e9eff8e4f838 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Fri, 22 Dec 2023 07:35:02 +0700
Subject: [PATCH 6/7] add algorithm for compatibility fix possibly

---
 main.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.cc b/main.cc
index 254a56fbb..21c2388ec 100644
--- a/main.cc
+++ b/main.cc
@@ -2,6 +2,7 @@
 #include <climits> // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
+#include <algorithm>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h> // for dirname()

From 97df4b9ecfbee7495c1926513e1b7dedea97fe8e Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Fri, 22 Dec 2023 07:56:20 +0700
Subject: [PATCH 7/7] try fix

---
 main.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/main.cc b/main.cc
index 21c2388ec..8a0e2f1ff 100644
--- a/main.cc
+++ b/main.cc
@@ -2,6 +2,7 @@
 #include <climits> // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
+#include <iostream>
 #include <algorithm>
 
 #if defined(__APPLE__) && defined(__MACH__)
@@ -12,6 +13,7 @@
 #include <unistd.h> // for readlink()
 #elif defined(_WIN32)
 #include <windows.h>
+#undef max
 #else
 #error "Unsupported platform!"
 #endif