From 2c25ffc1543020a7ab4d64e04a60b8d89ca435ed Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 4 Apr 2024 12:42:02 +0700 Subject: [PATCH 1/2] bug: enable and fix the cache again --- context/llama_server_context.h | 1 - controllers/llamaCPP.cc | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/context/llama_server_context.h b/context/llama_server_context.h index 34cb74c7e..c8f7f634e 100644 --- a/context/llama_server_context.h +++ b/context/llama_server_context.h @@ -860,7 +860,6 @@ struct llama_server_context { void kv_cache_clear() { // clear the entire KV cache llama_kv_cache_clear(ctx); - clean_kv_cache = false; } void update_system_prompt() { diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index c2bbd0d41..e01bc78d9 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -615,7 +615,7 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr jsonBody) { params.cont_batching = jsonBody->get("cont_batching", false).asBool(); this->clean_cache_threshold = jsonBody->get("clean_cache_threshold", 5).asInt(); - this->caching_enabled = jsonBody->get("caching_enabled", false).asBool(); + this->caching_enabled = jsonBody->get("caching_enabled", true).asBool(); this->user_prompt = jsonBody->get("user_prompt", "USER: ").asString(); this->ai_prompt = jsonBody->get("ai_prompt", "ASSISTANT: ").asString(); this->system_prompt = From dc77ce1c5d49faa4df5a8dc5806d8c86fe770a59 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 4 Apr 2024 13:14:00 +0700 Subject: [PATCH 2/2] bug: clean kv cache must be false all the time --- context/llama_server_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/context/llama_server_context.h b/context/llama_server_context.h index c8f7f634e..fe8564e34 100644 --- a/context/llama_server_context.h +++ b/context/llama_server_context.h @@ -471,7 +471,7 @@ struct llama_server_context { llama_batch batch; bool multimodal = false; - bool clean_kv_cache = true; + bool clean_kv_cache = false; bool all_slots_are_idle = false; bool add_bos_token = true;