From f999fd64ab9ab921da30ec1d7613b1e724e6b3c4 Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 22:48:52 +0700 Subject: [PATCH 1/3] POC file system --- controllers/llamaCPP.cc | 88 ++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index eaff6dd17..a7121e18d 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -359,21 +359,7 @@ void llamaCPP::modelStatus( return; } -void llamaCPP::loadModel( - const HttpRequestPtr &req, - std::function &&callback) { - - if (model_loaded) { - LOG_INFO << "model loaded"; - Json::Value jsonResp; - jsonResp["message"] = "Model already loaded"; - auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); - resp->setStatusCode(drogon::k409Conflict); - callback(resp); - return; - } - - const auto &jsonBody = req->getJsonObject(); +bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) { gpt_params params; @@ -381,25 +367,24 @@ void llamaCPP::loadModel( int drogon_thread = drogon::app().getThreadNum(); LOG_INFO << "Drogon thread is:" << drogon_thread; if (jsonBody) { - params.model = (*jsonBody)["llama_model_path"].asString(); - params.n_gpu_layers = (*jsonBody).get("ngl", 100).asInt(); - params.n_ctx = (*jsonBody).get("ctx_len", 2048).asInt(); - params.embedding = (*jsonBody).get("embedding", true).asBool(); + params.model = jsonBody["llama_model_path"].asString(); + params.n_gpu_layers = jsonBody.get("ngl", 100).asInt(); + params.n_ctx = jsonBody.get("ctx_len", 2048).asInt(); + params.embedding = jsonBody.get("embedding", true).asBool(); // Check if n_parallel exists in jsonBody, if not, set to drogon_thread - params.n_batch = (*jsonBody).get("n_batch", 512).asInt(); - params.n_parallel = (*jsonBody).get("n_parallel", drogon_thread).asInt(); + params.n_batch = jsonBody.get("n_batch", 512).asInt(); + params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt(); params.n_threads = - (*jsonBody) - .get("cpu_threads", std::thread::hardware_concurrency()) + jsonBody.get("cpu_threads", std::thread::hardware_concurrency()) .asInt(); - params.cont_batching = (*jsonBody).get("cont_batching", false).asBool(); + params.cont_batching = jsonBody.get("cont_batching", false).asBool(); - this->user_prompt = (*jsonBody).get("user_prompt", "USER: ").asString(); - this->ai_prompt = (*jsonBody).get("ai_prompt", "ASSISTANT: ").asString(); + this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString(); + this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString(); this->system_prompt = - (*jsonBody).get("system_prompt", "ASSISTANT's RULE: ").asString(); - this->pre_prompt = (*jsonBody).get("pre_prompt", "").asString(); - this->repeat_last_n = (*jsonBody).get("repeat_last_n", 32).asInt(); + jsonBody.get("system_prompt", "ASSISTANT's RULE: ").asString(); + this->pre_prompt = jsonBody.get("pre_prompt", "").asString(); + this->repeat_last_n = jsonBody.get("repeat_last_n", 32).asInt(); } #ifdef GGML_USE_CUBLAS LOG_INFO << "Setting up GGML CUBLAS PARAMS"; @@ -422,25 +407,46 @@ void llamaCPP::loadModel( // load the model if (!llama.load_model(params)) { - LOG_ERROR << "Error loading the model will exit the program"; - Json::Value jsonResp; - jsonResp["message"] = "Failed to load model"; - auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); - resp->setStatusCode(drogon::k500InternalServerError); - callback(resp); + LOG_ERROR << "Error loading the model"; + return false; // Indicate failure } llama.initialize(); - - Json::Value jsonResp; - jsonResp["message"] = "Model loaded successfully"; model_loaded = true; - auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); - LOG_INFO << "Started background task here!"; backgroundThread = std::thread(&llamaCPP::backgroundTask, this); warmupModel(); + return true; +} - callback(resp); +void llamaCPP::loadModel( + const HttpRequestPtr &req, + std::function &&callback) { + + if (model_loaded) { + LOG_INFO << "model loaded"; + Json::Value jsonResp; + jsonResp["message"] = "Model already loaded"; + auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); + resp->setStatusCode(drogon::k409Conflict); + callback(resp); + return; + } + + const auto &jsonBody = req->getJsonObject(); + if (!loadModelImpl(*jsonBody)) { + // Error occurred during model loading + Json::Value jsonResp; + jsonResp["message"] = "Failed to load model"; + auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); + resp->setStatusCode(drogon::k500InternalServerError); + callback(resp); + } else { + // Model loaded successfully + Json::Value jsonResp; + jsonResp["message"] = "Model loaded successfully"; + auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); + callback(resp); + } } void llamaCPP::backgroundTask() { From 40e37a26b0c4d0a8e5ebc0b2b44570614922b657 Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 22:49:05 +0700 Subject: [PATCH 2/3] POC file system --- controllers/llamaCPP.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index b470e79ae..ec0e7a1c0 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -1,3 +1,5 @@ +#include +#include #if defined(_WIN32) #define NOMINMAX #endif @@ -2117,6 +2119,25 @@ class llamaCPP : public drogon::HttpController { // log_disable(); // Disable the log to file feature, reduce bloat for // target // system () + std::vector llama_models = + nitro_utils::listFilesInDir(nitro_utils::models_folder); + std::string model_index; + if (llama_models.size() > 0) { + LOG_INFO << "Found models folder, here are the llama models you have:"; + int index_val = 0; + for (auto llama_model : llama_models) { + LOG_INFO << "index: " << index_val++ << "| model: " << llama_model; + std::cout + << "Please type the index of the model you want to load here >> "; + std::cin >> model_index; + Json::Value jsonBody; + jsonBody["llama_model_path"] = nitro_utils::models_folder + "/" + + llama_models[std::stoi(model_index)]; + loadModelImpl(jsonBody); + } + } else { + LOG_INFO << "Not found models folder, start server as usual"; + } } METHOD_LIST_BEGIN @@ -2145,6 +2166,8 @@ class llamaCPP : public drogon::HttpController { void modelStatus(const HttpRequestPtr &req, std::function &&callback); + bool loadModelImpl(const Json::Value &jsonBody); + void warmupModel(); void backgroundTask(); From 47f11100dfc04d80d2a980ea6d6103c48d0c401d Mon Sep 17 00:00:00 2001 From: tikikun Date: Mon, 4 Dec 2023 22:49:18 +0700 Subject: [PATCH 3/3] add file system utils --- utils/nitro_utils.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/utils/nitro_utils.h b/utils/nitro_utils.h index 97f463cb6..0c98a7108 100644 --- a/utils/nitro_utils.h +++ b/utils/nitro_utils.h @@ -6,9 +6,51 @@ #include #include #include +// Include platform-specific headers +#ifdef _WIN32 +#include +#else +#include +#endif namespace nitro_utils { +inline std::string models_folder = "./models"; + +inline std::vector listFilesInDir(const std::string &path) { + std::vector files; + +#ifdef _WIN32 + // Windows-specific code + WIN32_FIND_DATA findFileData; + HANDLE hFind = FindFirstFile((path + "\\*").c_str(), &findFileData); + + if (hFind != INVALID_HANDLE_VALUE) { + do { + if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + files.push_back(findFileData.cFileName); + } + } while (FindNextFile(hFind, &findFileData) != 0); + FindClose(hFind); + } +#else + // POSIX-specific code (Linux, Unix, MacOS) + DIR *dir; + struct dirent *ent; + + if ((dir = opendir(path.c_str())) != NULL) { + while ((ent = readdir(dir)) != NULL) { + if (ent->d_type == DT_REG) { // Check if it's a regular file + files.push_back(ent->d_name); + } + } + closedir(dir); + } +#endif + + return files; +} + inline std::string rtrim(const std::string &str) { size_t end = str.find_last_not_of("\n\t "); return (end == std::string::npos) ? "" : str.substr(0, end + 1);