diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index e4f33a27e..69284d6e9 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -190,8 +190,7 @@ void llamaCPP::InferenceImpl( if (llama.model_type == ModelType::EMBEDDING) { LOG_WARN << "Not support completion for embedding model"; Json::Value jsonResp; - jsonResp["message"] = - "Not support completion for embedding model"; + jsonResp["message"] = "Not support completion for embedding model"; auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); resp->setStatusCode(drogon::k400BadRequest); callback(resp); @@ -429,7 +428,8 @@ void llamaCPP::InferenceImpl( // Since this is an async task, we will wait for the task to be // completed - while (state->inference_status != FINISHED && retries < 10) { + while (state->inference_status != FINISHED && retries < 10 && + state->instance->llama.model_loaded_external) { // Should wait chunked_content_provider lambda to be called within // 3s if (state->inference_status == PENDING) { @@ -748,9 +748,10 @@ void llamaCPP::StopBackgroundTask() { if (llama.model_loaded_external) { llama.model_loaded_external = false; llama.condition_tasks.notify_one(); - LOG_INFO << "Background task stopped! "; + LOG_INFO << "Stopping background task! "; if (backgroundThread.joinable()) { backgroundThread.join(); } + LOG_INFO << "Background task stopped! "; } }