Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit e6c65b1

Browse files
sangjanaivansangpfiev
authored andcommitted
bug: trim the leading space
1 parent 9c3be5c commit e6c65b1

File tree

9 files changed

+91
-26
lines changed

9 files changed

+91
-26
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,4 @@ target_sources(${PROJECT_NAME} PRIVATE ${CTL_SRC} ${COMMON_SRC} ${CONTEXT_SRC})
9494
# ${FILTER_SRC} ${PLUGIN_SRC} ${MODEL_SRC})
9595
# ##############################################################################
9696
# uncomment the following line for dynamically loading views set_property(TARGET
97-
# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)
97+
# ${PROJECT_NAME} PROPERTY ENABLE_EXPORTS ON)

controllers/llamaCPP.cc

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include <fstream>
44
#include <iostream>
55
#include "log.h"
6-
#include "utils/nitro_utils.h"
76
#include "utils/logging_utils.h"
7+
#include "utils/nitro_utils.h"
88

99
// External
1010
#include "common.h"
@@ -210,7 +210,8 @@ void llamaCPP::InferenceImpl(
210210

211211
// Passing load value
212212
data["repeat_last_n"] = this->repeat_last_n;
213-
LOG_INFO_REQUEST(request_id) << "Stop words:" << completion.stop.toStyledString();
213+
LOG_INFO_REQUEST(request_id)
214+
<< "Stop words:" << completion.stop.toStyledString();
214215

215216
data["stream"] = completion.stream;
216217
data["n_predict"] = completion.max_tokens;
@@ -269,7 +270,8 @@ void llamaCPP::InferenceImpl(
269270
auto image_url = content_piece["image_url"]["url"].asString();
270271
std::string base64_image_data;
271272
if (image_url.find("http") != std::string::npos) {
272-
LOG_INFO_REQUEST(request_id) << "Remote image detected but not supported yet";
273+
LOG_INFO_REQUEST(request_id)
274+
<< "Remote image detected but not supported yet";
273275
} else if (image_url.find("data:image") != std::string::npos) {
274276
LOG_INFO_REQUEST(request_id) << "Base64 image detected";
275277
base64_image_data = nitro_utils::extractBase64(image_url);
@@ -330,29 +332,34 @@ void llamaCPP::InferenceImpl(
330332
if (is_streamed) {
331333
LOG_INFO_REQUEST(request_id) << "Streamed, waiting for respone";
332334
auto state = create_inference_state(this);
335+
bool is_first_token = true;
333336
auto chunked_content_provider =
334-
[state, data, request_id](char* pBuffer, std::size_t nBuffSize) -> std::size_t {
337+
[state, data, request_id, &is_first_token](
338+
char* pBuffer, std::size_t nBuffSize) -> std::size_t {
335339
if (state->inference_status == PENDING) {
336340
state->inference_status = RUNNING;
337341
} else if (state->inference_status == FINISHED) {
338342
return 0;
339343
}
340344

341345
if (!pBuffer) {
342-
LOG_WARN_REQUEST(request_id) "Connection closed or buffer is null. Reset context";
346+
LOG_WARN_REQUEST(request_id)
347+
"Connection closed or buffer is null. Reset context";
343348
state->inference_status = FINISHED;
344349
return 0;
345350
}
346351

347352
if (state->inference_status == EOS) {
348353
LOG_INFO_REQUEST(request_id) << "End of result";
354+
is_first_token = true;
349355
const std::string str =
350356
"data: " +
351357
create_return_json(nitro_utils::generate_random_string(20), "_", "",
352358
"stop") +
353359
"\n\n" + "data: [DONE]" + "\n\n";
354360

355-
LOG_VERBOSE("data stream", {{"request_id": request_id}, {"to_send", str}});
361+
LOG_VERBOSE("data stream",
362+
{{"request_id": request_id}, {"to_send", str}});
356363
std::size_t nRead = std::min(str.size(), nBuffSize);
357364
memcpy(pBuffer, str.data(), nRead);
358365
state->inference_status = FINISHED;
@@ -361,7 +368,13 @@ void llamaCPP::InferenceImpl(
361368

362369
task_result result = state->instance->llama.next_result(state->task_id);
363370
if (!result.error) {
364-
const std::string to_send = result.result_json["content"];
371+
std::string to_send = result.result_json["content"];
372+
373+
// trim the leading space if it is the first token
374+
if (std::exchange(is_first_token, false)) {
375+
nitro_utils::ltrim(to_send);
376+
}
377+
365378
const std::string str =
366379
"data: " +
367380
create_return_json(nitro_utils::generate_random_string(20), "_",
@@ -412,7 +425,8 @@ void llamaCPP::InferenceImpl(
412425
retries += 1;
413426
}
414427
if (state->inference_status != RUNNING)
415-
LOG_INFO_REQUEST(request_id) << "Wait for task to be released:" << state->task_id;
428+
LOG_INFO_REQUEST(request_id)
429+
<< "Wait for task to be released:" << state->task_id;
416430
std::this_thread::sleep_for(std::chrono::milliseconds(100));
417431
}
418432
LOG_INFO_REQUEST(request_id) << "Task completed, release it";
@@ -431,10 +445,12 @@ void llamaCPP::InferenceImpl(
431445
if (!result.error && result.stop) {
432446
int prompt_tokens = result.result_json["tokens_evaluated"];
433447
int predicted_tokens = result.result_json["tokens_predicted"];
434-
std::string full_return =
435-
create_full_return_json(nitro_utils::generate_random_string(20),
436-
"_", result.result_json["content"], "_",
437-
prompt_tokens, predicted_tokens);
448+
std::string to_send = result.result_json["content"];
449+
nitro_utils::ltrim(to_send);
450+
std::string full_return = create_full_return_json(
451+
nitro_utils::generate_random_string(20), "_", to_send, "_",
452+
prompt_tokens, predicted_tokens);
453+
438454
resp->setBody(full_return);
439455
} else {
440456
respData["message"] = "Internal error during inference";
@@ -468,7 +484,8 @@ void llamaCPP::EmbeddingImpl(
468484
// Queue embedding task
469485
auto state = create_inference_state(this);
470486

471-
state->instance->queue->runTaskInQueue([this, state, jsonBody, callback, request_id]() {
487+
state->instance->queue->runTaskInQueue([this, state, jsonBody, callback,
488+
request_id]() {
472489
Json::Value responseData(Json::arrayValue);
473490

474491
if (jsonBody->isMember("input")) {
@@ -542,7 +559,7 @@ void llamaCPP::ModelStatus(
542559
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
543560
callback(resp);
544561
LOG_INFO << "Model status responded";
545-
}
562+
}
546563
}
547564

548565
void llamaCPP::LoadModel(
@@ -552,10 +569,12 @@ void llamaCPP::LoadModel(
552569
if (!nitro_utils::isAVX2Supported() && ggml_cpu_has_avx2()) {
553570
LOG_ERROR << "AVX2 is not supported by your processor";
554571
Json::Value jsonResp;
555-
jsonResp["message"] = "AVX2 is not supported by your processor, please download and replace the correct Nitro asset version";
572+
jsonResp["message"] =
573+
"AVX2 is not supported by your processor, please download and replace "
574+
"the correct Nitro asset version";
556575
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
557576
resp->setStatusCode(drogon::k500InternalServerError);
558-
callback(resp);
577+
callback(resp);
559578
return;
560579
}
561580

@@ -589,10 +608,8 @@ void llamaCPP::LoadModel(
589608

590609
bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
591610
gpt_params params;
592-
LOG_INFO << "Start loading model";
593611
// By default will setting based on number of handlers
594612
if (jsonBody) {
595-
LOG_DEBUG << "Start parsing jsonBody";
596613
if (!jsonBody->operator[]("mmproj").isNull()) {
597614
LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
598615
params.mmproj = jsonBody->operator[]("mmproj").asString();
@@ -624,7 +641,8 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
624641
if (model_path.isNull()) {
625642
LOG_ERROR << "Missing model path in request";
626643
} else {
627-
if (std::filesystem::exists(std::filesystem::path(model_path.asString()))) {
644+
if (std::filesystem::exists(
645+
std::filesystem::path(model_path.asString()))) {
628646
params.model = model_path.asString();
629647
} else {
630648
LOG_ERROR << "Could not find model in path " << model_path.asString();

nitro_deps/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ ExternalProject_Add(
8383
ExternalProject_Add(
8484
gtest
8585
GIT_REPOSITORY https://github.com/google/googletest
86-
GIT_TAG v1.14.0
86+
GIT_TAG v1.14.0
8787
CMAKE_ARGS
8888
-DCMAKE_BUILD_TYPE=release
8989
-DCMAKE_PREFIX_PATH=${THIRD_PARTY_INSTALL_PATH}

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11

2-
add_subdirectory(models)
2+
add_subdirectory(components)

test/models/CMakeLists.txt renamed to test/components/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
file(GLOB SRCS *.cc)
2-
project(test-models)
2+
project(test-components)
33

44
add_executable(${PROJECT_NAME} ${SRCS})
55

@@ -11,4 +11,4 @@ target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest:
1111
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
1212

1313
add_test(NAME ${PROJECT_NAME}
14-
COMMAND ${PROJECT_NAME})
14+
COMMAND ${PROJECT_NAME})

test/models/main.cc renamed to test/components/main.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ int main(int argc, char **argv) {
66
::testing::InitGoogleTest(&argc, argv);
77
int ret = RUN_ALL_TESTS();
88
return ret;
9-
}
9+
}
File renamed without changes.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#include "gtest/gtest.h"
2+
#include "utils/nitro_utils.h"
3+
4+
class NitroUtilTest : public ::testing::Test {
5+
};
6+
7+
TEST_F(NitroUtilTest, left_trim) {
8+
{
9+
std::string empty;
10+
nitro_utils::ltrim(empty);
11+
EXPECT_EQ(empty, "");
12+
}
13+
14+
{
15+
std::string s = "abc";
16+
std::string expected = "abc";
17+
nitro_utils::ltrim(s);
18+
EXPECT_EQ(s, expected);
19+
}
20+
21+
{
22+
std::string s = " abc";
23+
std::string expected = "abc";
24+
nitro_utils::ltrim(s);
25+
EXPECT_EQ(s, expected);
26+
}
27+
28+
{
29+
std::string s = "1 abc 2 ";
30+
std::string expected = "1 abc 2 ";
31+
nitro_utils::ltrim(s);
32+
EXPECT_EQ(s, expected);
33+
}
34+
35+
{
36+
std::string s = " |abc";
37+
std::string expected = "|abc";
38+
nitro_utils::ltrim(s);
39+
EXPECT_EQ(s, expected);
40+
}
41+
}

utils/nitro_utils.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ inline std::string generate_random_string(std::size_t length) {
165165
std::random_device rd;
166166
std::mt19937 generator(rd());
167167

168-
std::uniform_int_distribution<> distribution(0, characters.size() - 1);
168+
std::uniform_int_distribution<> distribution(0, static_cast<int>(characters.size()) - 1);
169169

170170
std::string random_string(length, '\0');
171171
std::generate_n(random_string.begin(), length,
@@ -276,4 +276,10 @@ inline drogon::HttpResponsePtr nitroStreamResponse(
276276
return resp;
277277
}
278278

279+
inline void ltrim(std::string& s) {
280+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
281+
return !std::isspace(ch);
282+
}));
283+
};
284+
279285
} // namespace nitro_utils

0 commit comments

Comments
 (0)