Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 0739694

Browse files
sangjanaivansangpfiev
authored andcommitted
bug: trim the leading space
1 parent 50c03c9 commit 0739694

File tree

9 files changed

+88
-23
lines changed

9 files changed

+88
-23
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,4 @@ target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC} ${CONTEXT_SRC})
103103
# ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC})
104104
# ##############################################################################
105105
# uncomment the following line for dynamically loading views set_property(TARGET
106-
# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)
106+
# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)

controllers/llamaCPP.cc

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include <fstream>
44
#include <iostream>
55
#include "log.h"
6-
#include "utils/nitro_utils.h"
76
#include "utils/logging_utils.h"
7+
#include "utils/nitro_utils.h"
88

99
// External
1010
#include "common.h"
@@ -208,7 +208,8 @@ void llamaCPP::InferenceImpl(
208208

209209
// Passing load value
210210
data["repeat_last_n"] = this->repeat_last_n;
211-
LOG_INFO_REQUEST(request_id) << "Stop words:" << completion.stop.toStyledString();
211+
LOG_INFO_REQUEST(request_id)
212+
<< "Stop words:" << completion.stop.toStyledString();
212213

213214
data["stream"] = completion.stream;
214215
data["n_predict"] = completion.max_tokens;
@@ -267,7 +268,8 @@ void llamaCPP::InferenceImpl(
267268
auto image_url = content_piece["image_url"]["url"].asString();
268269
std::string base64_image_data;
269270
if (image_url.find("http") != std::string::npos) {
270-
LOG_INFO_REQUEST(request_id) << "Remote image detected but not supported yet";
271+
LOG_INFO_REQUEST(request_id)
272+
<< "Remote image detected but not supported yet";
271273
} else if (image_url.find("data:image") != std::string::npos) {
272274
LOG_INFO_REQUEST(request_id) << "Base64 image detected";
273275
base64_image_data = nitro_utils::extractBase64(image_url);
@@ -328,29 +330,34 @@ void llamaCPP::InferenceImpl(
328330
if (is_streamed) {
329331
LOG_INFO_REQUEST(request_id) << "Streamed, waiting for respone";
330332
auto state = create_inference_state(this);
333+
bool is_first_token = true;
331334
auto chunked_content_provider =
332-
[state, data, request_id](char* pBuffer, std::size_t nBuffSize) -> std::size_t {
335+
[state, data, request_id, &is_first_token](
336+
char* pBuffer, std::size_t nBuffSize) -> std::size_t {
333337
if (state->inference_status == PENDING) {
334338
state->inference_status = RUNNING;
335339
} else if (state->inference_status == FINISHED) {
336340
return 0;
337341
}
338342

339343
if (!pBuffer) {
340-
LOG_WARN_REQUEST(request_id) "Connection closed or buffer is null. Reset context";
344+
LOG_WARN_REQUEST(request_id)
345+
"Connection closed or buffer is null. Reset context";
341346
state->inference_status = FINISHED;
342347
return 0;
343348
}
344349

345350
if (state->inference_status == EOS) {
346351
LOG_INFO_REQUEST(request_id) << "End of result";
352+
is_first_token = true;
347353
const std::string str =
348354
"data: " +
349355
create_return_json(nitro_utils::generate_random_string(20), "_", "",
350356
"stop") +
351357
"\n\n" + "data: [DONE]" + "\n\n";
352358

353-
LOG_VERBOSE("data stream", {{"request_id": request_id}, {"to_send", str}});
359+
LOG_VERBOSE("data stream",
360+
{{"request_id": request_id}, {"to_send", str}});
354361
std::size_t nRead = std::min(str.size(), nBuffSize);
355362
memcpy(pBuffer, str.data(), nRead);
356363
state->inference_status = FINISHED;
@@ -359,7 +366,13 @@ void llamaCPP::InferenceImpl(
359366

360367
task_result result = state->instance->llama.next_result(state->task_id);
361368
if (!result.error) {
362-
const std::string to_send = result.result_json["content"];
369+
std::string to_send = result.result_json["content"];
370+
371+
// trim the leading space if it is the first token
372+
if (std::exchange(is_first_token, false)) {
373+
nitro_utils::ltrim(to_send);
374+
}
375+
363376
const std::string str =
364377
"data: " +
365378
create_return_json(nitro_utils::generate_random_string(20), "_",
@@ -410,7 +423,8 @@ void llamaCPP::InferenceImpl(
410423
retries += 1;
411424
}
412425
if (state->inference_status != RUNNING)
413-
LOG_INFO_REQUEST(request_id) << "Wait for task to be released:" << state->task_id;
426+
LOG_INFO_REQUEST(request_id)
427+
<< "Wait for task to be released:" << state->task_id;
414428
std::this_thread::sleep_for(std::chrono::milliseconds(100));
415429
}
416430
LOG_INFO_REQUEST(request_id) << "Task completed, release it";
@@ -428,8 +442,10 @@ void llamaCPP::InferenceImpl(
428442
if (!result.error && result.stop) {
429443
int prompt_tokens = result.result_json["tokens_evaluated"];
430444
int predicted_tokens = result.result_json["tokens_predicted"];
445+
std::string to_send = result.result_json["content"];
446+
nitro_utils::ltrim(to_send);
431447
respData = create_full_return_json(nitro_utils::generate_random_string(20),
432-
"_", result.result_json["content"], "_",
448+
"_", to_send, "_",
433449
prompt_tokens, predicted_tokens);
434450
} else {
435451
respData["message"] = "Internal error during inference";
@@ -463,7 +479,8 @@ void llamaCPP::EmbeddingImpl(
463479
// Queue embedding task
464480
auto state = create_inference_state(this);
465481

466-
state->instance->queue->runTaskInQueue([this, state, jsonBody, callback, request_id]() {
482+
state->instance->queue->runTaskInQueue([this, state, jsonBody, callback,
483+
request_id]() {
467484
Json::Value responseData(Json::arrayValue);
468485

469486
if (jsonBody->isMember("input")) {
@@ -535,7 +552,7 @@ void llamaCPP::ModelStatus(
535552
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
536553
callback(resp);
537554
LOG_INFO << "Model status responded";
538-
}
555+
}
539556
}
540557

541558
void llamaCPP::LoadModel(
@@ -545,10 +562,12 @@ void llamaCPP::LoadModel(
545562
if (!nitro_utils::isAVX2Supported() && ggml_cpu_has_avx2()) {
546563
LOG_ERROR << "AVX2 is not supported by your processor";
547564
Json::Value jsonResp;
548-
jsonResp["message"] = "AVX2 is not supported by your processor, please download and replace the correct Nitro asset version";
565+
jsonResp["message"] =
566+
"AVX2 is not supported by your processor, please download and replace "
567+
"the correct Nitro asset version";
549568
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
550569
resp->setStatusCode(drogon::k500InternalServerError);
551-
callback(resp);
570+
callback(resp);
552571
return;
553572
}
554573

@@ -582,10 +601,8 @@ void llamaCPP::LoadModel(
582601

583602
bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
584603
gpt_params params;
585-
LOG_INFO << "Start loading model";
586604
// By default will setting based on number of handlers
587605
if (jsonBody) {
588-
LOG_DEBUG << "Start parsing jsonBody";
589606
if (!jsonBody->operator[]("mmproj").isNull()) {
590607
LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
591608
params.mmproj = jsonBody->operator[]("mmproj").asString();
@@ -617,7 +634,8 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
617634
if (model_path.isNull()) {
618635
LOG_ERROR << "Missing model path in request";
619636
} else {
620-
if (std::filesystem::exists(std::filesystem::path(model_path.asString()))) {
637+
if (std::filesystem::exists(
638+
std::filesystem::path(model_path.asString()))) {
621639
params.model = model_path.asString();
622640
} else {
623641
LOG_ERROR << "Could not find model in path " << model_path.asString();

nitro_deps/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ ExternalProject_Add(
8383
ExternalProject_Add(
8484
gtest
8585
GIT_REPOSITORY https://github.com/google/googletest
86-
GIT_TAG v1.14.0
86+
GIT_TAG v1.14.0
8787
CMAKE_ARGS
8888
-DCMAKE_BUILD_TYPE=release
8989
-DCMAKE_PREFIX_PATH=${THIRD_PARTY_INSTALL_PATH}

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11

2-
add_subdirectory(models)
2+
add_subdirectory(components)

test/models/CMakeLists.txt renamed to test/components/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
file(GLOB SRCS *.cc)
2-
project(test-models)
2+
project(test-components)
33

44
add_executable(${PROJECT_NAME} ${SRCS})
55

@@ -11,4 +11,4 @@ target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest:
1111
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
1212

1313
add_test(NAME ${PROJECT_NAME}
14-
COMMAND ${PROJECT_NAME})
14+
COMMAND ${PROJECT_NAME})

test/models/main.cc renamed to test/components/main.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ int main(int argc, char **argv) {
66
::testing::InitGoogleTest(&argc, argv);
77
int ret = RUN_ALL_TESTS();
88
return ret;
9-
}
9+
}
File renamed without changes.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#include "gtest/gtest.h"
2+
#include "utils/nitro_utils.h"
3+
4+
class NitroUtilTest : public ::testing::Test {
5+
};
6+
7+
TEST_F(NitroUtilTest, left_trim) {
8+
{
9+
std::string empty;
10+
nitro_utils::ltrim(empty);
11+
EXPECT_EQ(empty, "");
12+
}
13+
14+
{
15+
std::string s = "abc";
16+
std::string expected = "abc";
17+
nitro_utils::ltrim(s);
18+
EXPECT_EQ(s, expected);
19+
}
20+
21+
{
22+
std::string s = " abc";
23+
std::string expected = "abc";
24+
nitro_utils::ltrim(s);
25+
EXPECT_EQ(s, expected);
26+
}
27+
28+
{
29+
std::string s = "1 abc 2 ";
30+
std::string expected = "1 abc 2 ";
31+
nitro_utils::ltrim(s);
32+
EXPECT_EQ(s, expected);
33+
}
34+
35+
{
36+
std::string s = " |abc";
37+
std::string expected = "|abc";
38+
nitro_utils::ltrim(s);
39+
EXPECT_EQ(s, expected);
40+
}
41+
}

utils/nitro_utils.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ inline std::string generate_random_string(std::size_t length) {
165165
std::random_device rd;
166166
std::mt19937 generator(rd());
167167

168-
std::uniform_int_distribution<> distribution(0, characters.size() - 1);
168+
std::uniform_int_distribution<> distribution(0, static_cast<int>(characters.size()) - 1);
169169

170170
std::string random_string(length, '\0');
171171
std::generate_n(random_string.begin(), length,
@@ -276,4 +276,10 @@ inline drogon::HttpResponsePtr nitroStreamResponse(
276276
return resp;
277277
}
278278

279+
inline void ltrim(std::string& s) {
280+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
281+
return !std::isspace(ch);
282+
}));
283+
};
284+
279285
} // namespace nitro_utils

0 commit comments

Comments
 (0)