diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index 6ab37bdb7..d9aa26716 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -178,7 +178,7 @@ void llamaCPP::chatCompletion(
 
   if (jsonBody) {
     // Default values to enable auto caching
-    data["cache_prompt"] = true;
+    data["cache_prompt"] = caching_enabled;
     data["n_keep"] = -1;
 
     // Passing load value
@@ -390,6 +390,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
             .asInt();
     params.cont_batching = jsonBody.get("cont_batching", false).asBool();
 
+    this->caching_enabled = jsonBody.get("caching_enabled", false).asBool();
     this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
     this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();
     this->system_prompt =
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
index 52c8b76a1..f480f7062 100644
--- a/controllers/llamaCPP.h
+++ b/controllers/llamaCPP.h
@@ -1908,5 +1908,6 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   std::string system_prompt;
   std::string pre_prompt;
   int repeat_last_n;
+  bool caching_enabled;
 };
 }; // namespace inferences
diff --git a/llama.cpp b/llama.cpp
index fe680e3d1..1f5cd8327 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit fe680e3d1080a765e5d3150ffd7bab189742898d
+Subproject commit 1f5cd83275fabb43f2ae92c30033b384a3eb37b4