From a4c0b8ec83195a7a1afed2b32e625b8a4e981525 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Mon, 11 Dec 2023 16:01:51 +0700
Subject: [PATCH 1/4] number of chats completed

---
 controllers/llamaCPP.h | 1 +
 1 file changed, 1 insertion(+)
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
index f480f7062..e1e3a6309 100644
--- a/controllers/llamaCPP.h
+++ b/controllers/llamaCPP.h
@@ -1909,5 +1909,6 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   std::string pre_prompt;
   int repeat_last_n;
   bool caching_enabled;
+  std::atomic<int> no_of_chats = 0; 
 };
 }; // namespace inferences

From 2f179bc80d17cc05a5c1c3ad5853b6db453155d3 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Mon, 11 Dec 2023 16:49:42 +0700
Subject: [PATCH 2/4] load model with clean_cache_threshold

---
 controllers/llamaCPP.cc | 11 +++++++++++
 controllers/llamaCPP.h  |  3 ++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index d9aa26716..dec8716a2 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -8,6 +8,7 @@
 #include <regex>
 #include <string>
 #include <thread>
+#include <trantor/utils/Logger.h>
 
 using namespace inferences;
 using json = nlohmann::json;
@@ -177,6 +178,14 @@ void llamaCPP::chatCompletion(
   // To set default value
 
   if (jsonBody) {
+    // Increase number of chats received and clean the prompt
+    no_of_chats++;
+    if (no_of_chats % clean_cache_threshold == 0) {
+      LOG_INFO << "Clean cache threshold reached!";
+      llama.kv_cache_clear();
+      LOG_INFO << "Cache cleaned";
+    }
+
     // Default values to enable auto caching
     data["cache_prompt"] = caching_enabled;
     data["n_keep"] = -1;
@@ -390,6 +399,8 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
             .asInt();
     params.cont_batching = jsonBody.get("cont_batching", false).asBool();
 
+    this->clean_cache_threshold =
+        jsonBody.get("clean_cache_threshold", 5).asInt();
     this->caching_enabled = jsonBody.get("caching_enabled", false).asBool();
     this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
     this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
index e1e3a6309..2528056fd 100644
--- a/controllers/llamaCPP.h
+++ b/controllers/llamaCPP.h
@@ -1909,6 +1909,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   std::string pre_prompt;
   int repeat_last_n;
   bool caching_enabled;
-  std::atomic<int> no_of_chats = 0; 
+  std::atomic<int> no_of_chats = 0;
+  int clean_cache_threshold;
 };
 }; // namespace inferences

From 4d96e59cd972ba479e0661efe7a78cc8d3ec85e9 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Mon, 11 Dec 2023 16:50:49 +0700
Subject: [PATCH 3/4] redundant

---
 controllers/llamaCPP.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index dec8716a2..84c323da1 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -8,7 +8,6 @@
 #include <regex>
 #include <string>
 #include <thread>
-#include <trantor/utils/Logger.h>
 
 using namespace inferences;
 using json = nlohmann::json;

From 5aa10f9cc8d96c3b04270ac5c1eff39c9dd8d6fe Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Mon, 11 Dec 2023 16:52:10 +0700
Subject: [PATCH 4/4] clean cach threshold docs udpate

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 3272fb249..759932e17 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,7 @@ Table of parameters
 | `cpu_threads`   | Integer | The number of threads to use for inferencing (CPU MODE ONLY) |
 | `n_batch`       | Integer | The batch size for prompt eval step |
 | `caching_enabled` | Boolean | To enable prompt caching or not   |
+| `clean_cache_threshold` | Integer | Number of chats that will trigger clean cache action|
 
 ***OPTIONAL***: You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal
 ```zsh