bug: trim the leading space

sangjanai · vansangpfiev · commit e6c65b1bdfb2 · 2024-04-09T08:33:39.000+07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -94,4 +94,4 @@ target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC} ${CONTEXT_SRC})
 # ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC})
 # ##############################################################################
 # uncomment the following line for dynamically loading views set_property(TARGET
-# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)
+# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -3,8 +3,8 @@
 #include <fstream>
 #include <iostream>
 #include "log.h"
-#include "utils/nitro_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/nitro_utils.h"
 
 // External
 #include "common.h"
@@ -210,7 +210,8 @@ void llamaCPP::InferenceImpl(
 
   // Passing load value
   data["repeat_last_n"] = this->repeat_last_n;
-  LOG_INFO_REQUEST(request_id) << "Stop words:" << completion.stop.toStyledString();
+  LOG_INFO_REQUEST(request_id)
+      << "Stop words:" << completion.stop.toStyledString();
 
   data["stream"] = completion.stream;
   data["n_predict"] = completion.max_tokens;
@@ -269,7 +270,8 @@ void llamaCPP::InferenceImpl(
             auto image_url = content_piece["image_url"]["url"].asString();
             std::string base64_image_data;
             if (image_url.find("http") != std::string::npos) {
-              LOG_INFO_REQUEST(request_id) << "Remote image detected but not supported yet";
+              LOG_INFO_REQUEST(request_id)
+                  << "Remote image detected but not supported yet";
             } else if (image_url.find("data:image") != std::string::npos) {
               LOG_INFO_REQUEST(request_id) << "Base64 image detected";
               base64_image_data = nitro_utils::extractBase64(image_url);
@@ -330,29 +332,34 @@ void llamaCPP::InferenceImpl(
   if (is_streamed) {
     LOG_INFO_REQUEST(request_id) << "Streamed, waiting for respone";
     auto state = create_inference_state(this);
+    bool is_first_token = true;
     auto chunked_content_provider =
-        [state, data, request_id](char* pBuffer, std::size_t nBuffSize) -> std::size_t {
+        [state, data, request_id, &is_first_token](
+            char* pBuffer, std::size_t nBuffSize) -> std::size_t {
       if (state->inference_status == PENDING) {
         state->inference_status = RUNNING;
       } else if (state->inference_status == FINISHED) {
         return 0;
       }
 
       if (!pBuffer) {
-        LOG_WARN_REQUEST(request_id) "Connection closed or buffer is null. Reset context";
+        LOG_WARN_REQUEST(request_id)
+        "Connection closed or buffer is null. Reset context";
         state->inference_status = FINISHED;
         return 0;
       }
 
       if (state->inference_status == EOS) {
         LOG_INFO_REQUEST(request_id) << "End of result";
+        is_first_token = true;
         const std::string str =
             "data: " +
             create_return_json(nitro_utils::generate_random_string(20), "_", "",
                                "stop") +
             "\n\n" + "data: [DONE]" + "\n\n";
 
-        LOG_VERBOSE("data stream", {{"request_id": request_id}, {"to_send", str}});
+        LOG_VERBOSE("data stream",
+                    {{"request_id": request_id}, {"to_send", str}});
         std::size_t nRead = std::min(str.size(), nBuffSize);
         memcpy(pBuffer, str.data(), nRead);
         state->inference_status = FINISHED;
@@ -361,7 +368,13 @@ void llamaCPP::InferenceImpl(
 
       task_result result = state->instance->llama.next_result(state->task_id);
       if (!result.error) {
-        const std::string to_send = result.result_json["content"];
+        std::string to_send = result.result_json["content"];
+
+        // trim the leading space if it is the first token
+        if (std::exchange(is_first_token, false)) {
+          nitro_utils::ltrim(to_send);
+        }
+
         const std::string str =
             "data: " +
             create_return_json(nitro_utils::generate_random_string(20), "_",
@@ -412,7 +425,8 @@ void llamaCPP::InferenceImpl(
               retries += 1;
             }
             if (state->inference_status != RUNNING)
-              LOG_INFO_REQUEST(request_id) << "Wait for task to be released:" << state->task_id;
+              LOG_INFO_REQUEST(request_id)
+                  << "Wait for task to be released:" << state->task_id;
             std::this_thread::sleep_for(std::chrono::milliseconds(100));
           }
           LOG_INFO_REQUEST(request_id) << "Task completed, release it";
@@ -431,10 +445,12 @@ void llamaCPP::InferenceImpl(
       if (!result.error && result.stop) {
         int prompt_tokens = result.result_json["tokens_evaluated"];
         int predicted_tokens = result.result_json["tokens_predicted"];
-        std::string full_return =
-            create_full_return_json(nitro_utils::generate_random_string(20),
-                                    "_", result.result_json["content"], "_",
-                                    prompt_tokens, predicted_tokens);
+        std::string to_send = result.result_json["content"];
+        nitro_utils::ltrim(to_send);
+        std::string full_return = create_full_return_json(
+            nitro_utils::generate_random_string(20), "_", to_send, "_",
+            prompt_tokens, predicted_tokens);
+
         resp->setBody(full_return);
       } else {
         respData["message"] = "Internal error during inference";
@@ -468,7 +484,8 @@ void llamaCPP::EmbeddingImpl(
   // Queue embedding task
   auto state = create_inference_state(this);
 
-  state->instance->queue->runTaskInQueue([this, state, jsonBody, callback, request_id]() {
+  state->instance->queue->runTaskInQueue([this, state, jsonBody, callback,
+                                          request_id]() {
     Json::Value responseData(Json::arrayValue);
 
     if (jsonBody->isMember("input")) {
@@ -542,7 +559,7 @@ void llamaCPP::ModelStatus(
     auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
     callback(resp);
     LOG_INFO << "Model status responded";
-  } 
+  }
 }
 
 void llamaCPP::LoadModel(
@@ -552,10 +569,12 @@ void llamaCPP::LoadModel(
   if (!nitro_utils::isAVX2Supported() && ggml_cpu_has_avx2()) {
     LOG_ERROR << "AVX2 is not supported by your processor";
     Json::Value jsonResp;
-    jsonResp["message"] = "AVX2 is not supported by your processor, please download and replace the correct Nitro asset version";
+    jsonResp["message"] =
+        "AVX2 is not supported by your processor, please download and replace "
+        "the correct Nitro asset version";
     auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
     resp->setStatusCode(drogon::k500InternalServerError);
-    callback(resp);    
+    callback(resp);
     return;
   }
 
@@ -589,10 +608,8 @@ void llamaCPP::LoadModel(
 
 bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
   gpt_params params;
-  LOG_INFO << "Start loading model";
   // By default will setting based on number of handlers
   if (jsonBody) {
-    LOG_DEBUG << "Start parsing jsonBody";
     if (!jsonBody->operator[]("mmproj").isNull()) {
       LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
       params.mmproj = jsonBody->operator[]("mmproj").asString();
@@ -624,7 +641,8 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
     if (model_path.isNull()) {
       LOG_ERROR << "Missing model path in request";
     } else {
-      if (std::filesystem::exists(std::filesystem::path(model_path.asString()))) {
+      if (std::filesystem::exists(
+              std::filesystem::path(model_path.asString()))) {
         params.model = model_path.asString();
       } else {
         LOG_ERROR << "Could not find model in path " << model_path.asString();
diff --git a/nitro_deps/CMakeLists.txt b/nitro_deps/CMakeLists.txt
@@ -83,7 +83,7 @@ ExternalProject_Add(
 ExternalProject_Add(
     gtest
     GIT_REPOSITORY https://github.com/google/googletest
-	GIT_TAG v1.14.0
+    GIT_TAG v1.14.0
     CMAKE_ARGS
 	-DCMAKE_BUILD_TYPE=release
 	-DCMAKE_PREFIX_PATH=${THIRD_PARTY_INSTALL_PATH}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -1,2 +1,2 @@
 
-add_subdirectory(models)
+add_subdirectory(components)
diff --git a/test/components/CMakeLists.txt b/test/components/CMakeLists.txt
@@ -1,5 +1,5 @@
 file(GLOB SRCS *.cc)
-project(test-models)
+project(test-components)
 
 add_executable(${PROJECT_NAME} ${SRCS})
 
@@ -11,4 +11,4 @@ target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest:
 target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
 
 add_test(NAME ${PROJECT_NAME}
-         COMMAND ${PROJECT_NAME})
+         COMMAND ${PROJECT_NAME})
diff --git a/test/components/main.cc b/test/components/main.cc
@@ -6,4 +6,4 @@ int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
     int ret = RUN_ALL_TESTS();
     return ret;
-}
+}
diff --git a/test/components/test_models.cc b/test/components/test_models.cc
diff --git a/test/components/test_nitro_utils.cc b/test/components/test_nitro_utils.cc
@@ -0,0 +1,41 @@
+#include "gtest/gtest.h"
+#include "utils/nitro_utils.h"
+
+class NitroUtilTest : public ::testing::Test {
+};
+
+TEST_F(NitroUtilTest, left_trim) {
+    {
+        std::string empty;
+        nitro_utils::ltrim(empty);
+        EXPECT_EQ(empty, "");
+    }
+
+    {
+        std::string s = "abc";
+        std::string expected = "abc";
+        nitro_utils::ltrim(s);
+        EXPECT_EQ(s, expected);
+    }
+
+    {
+        std::string s = " abc";
+        std::string expected = "abc";
+        nitro_utils::ltrim(s);
+        EXPECT_EQ(s, expected);
+    }
+
+    {
+        std::string s = "1 abc 2 ";
+        std::string expected = "1 abc 2 ";
+        nitro_utils::ltrim(s);
+        EXPECT_EQ(s, expected);
+    }
+
+    {
+        std::string s = " |abc";
+        std::string expected = "|abc";
+        nitro_utils::ltrim(s);
+        EXPECT_EQ(s, expected);
+    }
+}
diff --git a/utils/nitro_utils.h b/utils/nitro_utils.h
@@ -165,7 +165,7 @@ inline std::string generate_random_string(std::size_t length) {
   std::random_device rd;
   std::mt19937 generator(rd());
 
-  std::uniform_int_distribution<> distribution(0, characters.size() - 1);
+  std::uniform_int_distribution<> distribution(0, static_cast<int>(characters.size()) - 1);
 
   std::string random_string(length, '\0');
   std::generate_n(random_string.begin(), length,
@@ -276,4 +276,10 @@ inline drogon::HttpResponsePtr nitroStreamResponse(
   return resp;
 }
 
+inline void ltrim(std::string& s) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
+            return !std::isspace(ch);
+          }));
+};
+
 } // namespace nitro_utils

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`
`2`		`-add_subdirectory(models)`
	`2`	`+add_subdirectory(components)`