From f19d88352135b2c8798158ab6fa319cd993fc494 Mon Sep 17 00:00:00 2001 From: Cam Ng Date: Wed, 10 Apr 2024 14:12:25 +0700 Subject: [PATCH] fix: Refactor code for content-type responses --- controllers/llamaCPP.cc | 21 +++++++-------------- utils/nitro_utils.h | 4 ++-- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index eaea1e9c8..6dff90090 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -77,7 +77,7 @@ Json::Value create_embedding_payload(const std::vector& embedding, return dataItem; } -std::string create_full_return_json(const std::string& id, +Json::Value create_full_return_json(const std::string& id, const std::string& model, const std::string& content, const std::string& system_fingerprint, @@ -110,9 +110,7 @@ std::string create_full_return_json(const std::string& id, usage["total_tokens"] = prompt_tokens + completion_tokens; root["usage"] = usage; - Json::StreamWriterBuilder writer; - writer["indentation"] = ""; // Compact output - return Json::writeString(writer, root); + return root; } std::string create_return_json(const std::string& id, const std::string& model, @@ -422,7 +420,6 @@ void llamaCPP::InferenceImpl( }); } else { Json::Value respData; - auto resp = nitro_utils::nitroHttpResponse(); int task_id = llama.request_completion(data, false, false, -1); LOG_INFO_REQUEST(request_id) << "Non stream, waiting for respone"; if (!json_value(data, "stream", false)) { @@ -431,16 +428,14 @@ void llamaCPP::InferenceImpl( if (!result.error && result.stop) { int prompt_tokens = result.result_json["tokens_evaluated"]; int predicted_tokens = result.result_json["tokens_predicted"]; - std::string full_return = - create_full_return_json(nitro_utils::generate_random_string(20), - "_", result.result_json["content"], "_", - prompt_tokens, predicted_tokens); - resp->setBody(full_return); + respData = create_full_return_json(nitro_utils::generate_random_string(20), + "_", result.result_json["content"], "_", + prompt_tokens, predicted_tokens); } else { respData["message"] = "Internal error during inference"; - resp = nitro_utils::nitroHttpJsonResponse(respData); LOG_ERROR_REQUEST(request_id) << "Error during inference"; } + auto resp = nitro_utils::nitroHttpJsonResponse(respData); callback(resp); LOG_INFO_REQUEST(request_id) << "Inference completed"; } @@ -496,7 +491,6 @@ void llamaCPP::EmbeddingImpl( } } - auto resp = nitro_utils::nitroHttpResponse(); Json::Value root; root["data"] = responseData; root["model"] = "_"; @@ -506,8 +500,7 @@ void llamaCPP::EmbeddingImpl( usage["total_tokens"] = 0; root["usage"] = usage; - resp->setBody(Json::writeString(Json::StreamWriterBuilder(), root)); - resp->setContentTypeString("application/json"); + auto resp = nitro_utils::nitroHttpJsonResponse(root); callback(resp); LOG_INFO_REQUEST(request_id) << "Embedding completed"; }); diff --git a/utils/nitro_utils.h b/utils/nitro_utils.h index c10e713a9..3957af6eb 100644 --- a/utils/nitro_utils.h +++ b/utils/nitro_utils.h @@ -246,7 +246,7 @@ inline void nitro_logo() { } inline drogon::HttpResponsePtr nitroHttpResponse() { - auto resp = drogon::HttpResponse::newHttpResponse(drogon::k200OK, drogon::CT_APPLICATION_JSON); + auto resp = drogon::HttpResponse::newHttpResponse(); #ifdef ALLOW_ALL_CORS LOG_INFO << "Respond for all cors!"; resp->addHeader("Access-Control-Allow-Origin", "*"); @@ -260,7 +260,7 @@ inline drogon::HttpResponsePtr nitroHttpJsonResponse(const Json::Value &data) { LOG_INFO << "Respond for all cors!"; resp->addHeader("Access-Control-Allow-Origin", "*"); #endif - resp->setContentTypeString("application/json"); + // Drogon already set the content-type header to "application/json" return resp; };