diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 6ab37bdb7..d9aa26716 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -178,7 +178,7 @@ void llamaCPP::chatCompletion( if (jsonBody) { // Default values to enable auto caching - data["cache_prompt"] = true; + data["cache_prompt"] = caching_enabled; data["n_keep"] = -1; // Passing load value @@ -390,6 +390,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) { .asInt(); params.cont_batching = jsonBody.get("cont_batching", false).asBool(); + this->caching_enabled = jsonBody.get("caching_enabled", false).asBool(); this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString(); this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString(); this->system_prompt = diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index 52c8b76a1..f480f7062 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -1908,5 +1908,6 @@ class llamaCPP : public drogon::HttpController { std::string system_prompt; std::string pre_prompt; int repeat_last_n; + bool caching_enabled; }; }; // namespace inferences diff --git a/llama.cpp b/llama.cpp index fe680e3d1..1f5cd8327 160000 --- a/llama.cpp +++ b/llama.cpp @@ -1 +1 @@ -Subproject commit fe680e3d1080a765e5d3150ffd7bab189742898d +Subproject commit 1f5cd83275fabb43f2ae92c30033b384a3eb37b4