From d5d9f31b284e037a11a3d495ad32f03ed19cd3e0 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Tue, 30 Jan 2024 21:34:49 +0700
Subject: [PATCH 1/2] bug: add request completion back to local level and
 remove task_id on outer scope

---
 controllers/llamaCPP.cc | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index a26eaa94d..75b5611c4 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -22,8 +22,9 @@ std::shared_ptr<inferenceState> create_inference_state(llamaCPP *instance) {
 // --------------------------------------------
 
 // Function to check if the model is loaded
-void check_model_loaded(llama_server_context &llama, const HttpRequestPtr &req,
-                      std::function<void(const HttpResponsePtr &)> &callback) {
+void check_model_loaded(
+    llama_server_context &llama, const HttpRequestPtr &req,
+    std::function<void(const HttpResponsePtr &)> &callback) {
   if (!llama.model_loaded_external) {
     Json::Value jsonResp;
     jsonResp["message"] =
@@ -299,13 +300,10 @@ void llamaCPP::chatCompletion(
   LOG_INFO << "Current completion text";
   LOG_INFO << formatted_output;
 #endif
-  int task_id;
 
-  LOG_INFO << "Resolved request for task_id:" << task_id;
 
   if (is_streamed) {
     auto state = create_inference_state(this);
-    state->task_id = task_id;
     auto chunked_content_provider =
         [state, data](char *pBuffer, std::size_t nBuffSize) -> std::size_t {
       if (!state->is_streaming) {
@@ -386,9 +384,12 @@ void llamaCPP::chatCompletion(
   } else {
     Json::Value respData;
     auto resp = nitro_utils::nitroHttpResponse();
+    int task_id = llama.request_completion(data, false, false, -1);
+    LOG_INFO << "sent the non stream, waiting for respone";
     if (!json_value(data, "stream", false)) {
       std::string completion_text;
       task_result result = llama.next_result(task_id);
+      LOG_INFO << "Here is the result:" << result.error;
       if (!result.error && result.stop) {
         int prompt_tokens = result.result_json["tokens_evaluated"];
         int predicted_tokens = result.result_json["tokens_predicted"];

From 6040177c4ad6310cef56af934852ec78e204b6e9 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Tue, 30 Jan 2024 21:35:03 +0700
Subject: [PATCH 2/2] bug: add request completion back to local level and
 remove task_id on outer scope

---
 controllers/llamaCPP.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index 75b5611c4..65071fc1f 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -301,7 +301,6 @@ void llamaCPP::chatCompletion(
   LOG_INFO << formatted_output;
 #endif
 
-
   if (is_streamed) {
     auto state = create_inference_state(this);
     auto chunked_content_provider =