From d5d9f31b284e037a11a3d495ad32f03ed19cd3e0 Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 30 Jan 2024 21:34:49 +0700 Subject: [PATCH 1/2] bug: add request completion back to local level and remove task_id on outer scope --- controllers/llamaCPP.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index a26eaa94d..75b5611c4 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -22,8 +22,9 @@ std::shared_ptr create_inference_state(llamaCPP *instance) { // -------------------------------------------- // Function to check if the model is loaded -void check_model_loaded(llama_server_context &llama, const HttpRequestPtr &req, - std::function &callback) { +void check_model_loaded( + llama_server_context &llama, const HttpRequestPtr &req, + std::function &callback) { if (!llama.model_loaded_external) { Json::Value jsonResp; jsonResp["message"] = @@ -299,13 +300,10 @@ void llamaCPP::chatCompletion( LOG_INFO << "Current completion text"; LOG_INFO << formatted_output; #endif - int task_id; - LOG_INFO << "Resolved request for task_id:" << task_id; if (is_streamed) { auto state = create_inference_state(this); - state->task_id = task_id; auto chunked_content_provider = [state, data](char *pBuffer, std::size_t nBuffSize) -> std::size_t { if (!state->is_streaming) { @@ -386,9 +384,12 @@ void llamaCPP::chatCompletion( } else { Json::Value respData; auto resp = nitro_utils::nitroHttpResponse(); + int task_id = llama.request_completion(data, false, false, -1); + LOG_INFO << "sent the non stream, waiting for respone"; if (!json_value(data, "stream", false)) { std::string completion_text; task_result result = llama.next_result(task_id); + LOG_INFO << "Here is the result:" << result.error; if (!result.error && result.stop) { int prompt_tokens = result.result_json["tokens_evaluated"]; int predicted_tokens = result.result_json["tokens_predicted"]; From 6040177c4ad6310cef56af934852ec78e204b6e9 Mon Sep 17 00:00:00 2001 From: tikikun Date: Tue, 30 Jan 2024 21:35:03 +0700 Subject: [PATCH 2/2] bug: add request completion back to local level and remove task_id on outer scope --- controllers/llamaCPP.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 75b5611c4..65071fc1f 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -301,7 +301,6 @@ void llamaCPP::chatCompletion( LOG_INFO << formatted_output; #endif - if (is_streamed) { auto state = create_inference_state(this); auto chunked_content_provider =