Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ Table of parameters
| `cpu_threads` | Integer | The number of threads to use for inferencing (CPU MODE ONLY) |
| `n_batch` | Integer | The batch size for prompt eval step |
| `caching_enabled` | Boolean | To enable prompt caching or not |
| `clean_cache_threshold` | Integer | Number of chats that will trigger clean cache action|

***OPTIONAL***: You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal
```zsh
Expand Down
10 changes: 10 additions & 0 deletions controllers/llamaCPP.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ void llamaCPP::chatCompletion(
// To set default value

if (jsonBody) {
// Increase number of chats received and clean the prompt
no_of_chats++;
if (no_of_chats % clean_cache_threshold == 0) {
LOG_INFO << "Clean cache threshold reached!";
llama.kv_cache_clear();
LOG_INFO << "Cache cleaned";
}

// Default values to enable auto caching
data["cache_prompt"] = caching_enabled;
data["n_keep"] = -1;
Expand Down Expand Up @@ -390,6 +398,8 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
.asInt();
params.cont_batching = jsonBody.get("cont_batching", false).asBool();

this->clean_cache_threshold =
jsonBody.get("clean_cache_threshold", 5).asInt();
this->caching_enabled = jsonBody.get("caching_enabled", false).asBool();
this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();
Expand Down
2 changes: 2 additions & 0 deletions controllers/llamaCPP.h
Original file line number Diff line number Diff line change
Expand Up @@ -1909,5 +1909,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
std::string pre_prompt;
int repeat_last_n;
bool caching_enabled;
std::atomic<int> no_of_chats = 0;
int clean_cache_threshold;
};
}; // namespace inferences