diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index da1cc6554..41ce65e2f 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -359,7 +359,8 @@ void llamaCPP::chatCompletion( while (state->instance->single_queue_is_busy) { LOG_INFO << "Waiting for task to be released status:" << state->instance->single_queue_is_busy; - std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Waiting in 500 miliseconds step + std::this_thread::sleep_for(std::chrono::milliseconds( + 500)); // Waiting in 500 miliseconds step } } std::string str = "\n\n"; @@ -476,6 +477,9 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) { params.grp_attn_w = jsonBody["grp_attn_w"].asInt(); } + if (!jsonBody["mlock"].isNull()) { + params.use_mlock = jsonBody["mlock"].asBool(); + } params.model = jsonBody["llama_model_path"].asString(); params.n_gpu_layers = jsonBody.get("ngl", 100).asInt(); params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();