From f606e66f07c07cdfb87b6b2bbb181b482f4371bf Mon Sep 17 00:00:00 2001 From: Thuandz Date: Wed, 18 Sep 2024 22:49:17 +0700 Subject: [PATCH 01/10] add model import command --- engine/commands/model_import_cmd.cc | 52 +++++++++++++++++++++++ engine/commands/model_import_cmd.h | 16 +++++++ engine/controllers/command_line_parser.cc | 17 +++++++- 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 engine/commands/model_import_cmd.cc create mode 100644 engine/commands/model_import_cmd.h diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc new file mode 100644 index 000000000..d43d8ddf4 --- /dev/null +++ b/engine/commands/model_import_cmd.cc @@ -0,0 +1,52 @@ +#include "model_import_cmd.h" +#include +#include +#include +#include "config/gguf_parser.h" +#include "config/yaml_config.h" +#include "trantor/utils/Logger.h" +#include "utils/file_manager_utils.h" +#include "utils/logging_utils.h" +#include "utils/modellist_utils.h" + +namespace commands { + +ModelImportCmd::ModelImportCmd(std::string model_handle, std::string model_path) + : model_handle_(std::move(model_handle)), + model_path_(std::move(model_path)) {} + +void ModelImportCmd::Exec() { + config::GGUFHandler gguf_handler; + config::YamlHandler yaml_handler; + modellist_utils::ModelListUtils modellist_utils_obj; + + std::string model_yaml_path = (file_manager_utils::GetModelsContainerPath() / + std::filesystem::path("imported") / + std::filesystem::path(model_handle_ + ".yml")) + .string(); + modellist_utils::ModelEntry model_entry{ + model_handle_, "local", "imported", + model_yaml_path, model_handle_, modellist_utils::ModelStatus::READY}; + try { + std::filesystem::create_directories( + std::filesystem::path(model_yaml_path).parent_path()); + gguf_handler.Parse(model_path_); + config::ModelConfig model_config = gguf_handler.GetModelConfig(); + model_config.files.push_back(model_path_); + yaml_handler.UpdateModelConfig(model_config); + + if(modellist_utils_obj.AddModelEntry(model_entry)){ + yaml_handler.WriteYamlFile(model_yaml_path); + CLI_LOG("Model is imported successfully!"); + } + else{ + CLI_LOG("Fail to import model, model_id '"+model_handle_+"' already exists!" ); + } + + } catch (const std::exception& e) { + std::remove(model_yaml_path.c_str()); + CTL_ERR("Error importing model '" << model_path_ << "' with model_id '" + << model_handle_ << "': " << e.what()); + } +} +} // namespace commands \ No newline at end of file diff --git a/engine/commands/model_import_cmd.h b/engine/commands/model_import_cmd.h new file mode 100644 index 000000000..b1441a281 --- /dev/null +++ b/engine/commands/model_import_cmd.h @@ -0,0 +1,16 @@ +#pragma once + +#include // For std::isnan +#include +namespace commands { + +class ModelImportCmd { + public: + ModelImportCmd(std::string model_handle, std::string model_path); + void Exec(); + + private: + std::string model_handle_; + std::string model_path_; +}; +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 9c4b5713f..06751ae5f 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -8,6 +8,7 @@ #include "commands/engine_uninstall_cmd.h" #include "commands/model_del_cmd.h" #include "commands/model_get_cmd.h" +#include "commands/model_import_cmd.h" #include "commands/model_list_cmd.h" #include "commands/model_pull_cmd.h" #include "commands/model_start_cmd.h" @@ -155,6 +156,20 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto model_update_cmd = models_cmd->add_subcommand("update", "Update configuration of a model"); + std::string model_path; + auto model_import_cmd = models_cmd->add_subcommand( + "import", "Import a gguf model from local file"); + model_import_cmd->add_option("--model_id", model_id, ""); + model_import_cmd->require_option(); + model_import_cmd->add_option("--model_path", model_path, + "Absolute path to .gguf model, the path should " + "include the gguf file name"); + model_import_cmd->require_option(); + model_import_cmd->callback([&model_id,&model_path]() { + commands::ModelImportCmd command(model_id, model_path); + command.Exec(); + }); + // Default version is latest std::string version{"latest"}; // engines group commands @@ -238,7 +253,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto ps_cmd = app_.add_subcommand("ps", "Show running models and their status"); ps_cmd->group(kSystemGroup); - + CLI11_PARSE(app_, argc, argv); if (argc == 1) { CLI_LOG(app_.help()); From cc4200b9dc8e1026cd153de2d45e0dfbf378506b Mon Sep 17 00:00:00 2001 From: Thuandz Date: Thu, 19 Sep 2024 08:15:44 +0700 Subject: [PATCH 02/10] Add name to model.yml --- engine/commands/model_import_cmd.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc index d43d8ddf4..07079fc3b 100644 --- a/engine/commands/model_import_cmd.cc +++ b/engine/commands/model_import_cmd.cc @@ -33,6 +33,7 @@ void ModelImportCmd::Exec() { gguf_handler.Parse(model_path_); config::ModelConfig model_config = gguf_handler.GetModelConfig(); model_config.files.push_back(model_path_); + model_config.name = model_handle_; yaml_handler.UpdateModelConfig(model_config); if(modellist_utils_obj.AddModelEntry(model_entry)){ From f9af68042eb1b2931ef8ee52af2f0e492339469b Mon Sep 17 00:00:00 2001 From: Thuandz Date: Thu, 19 Sep 2024 09:47:48 +0700 Subject: [PATCH 03/10] add e2e test --- engine/commands/model_import_cmd.cc | 19 ++++++++++--------- engine/e2e-test/main.py | 1 + engine/e2e-test/test_cli_model_import.py | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 engine/e2e-test/test_cli_model_import.py diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc index 07079fc3b..dfc9997f8 100644 --- a/engine/commands/model_import_cmd.cc +++ b/engine/commands/model_import_cmd.cc @@ -36,18 +36,19 @@ void ModelImportCmd::Exec() { model_config.name = model_handle_; yaml_handler.UpdateModelConfig(model_config); - if(modellist_utils_obj.AddModelEntry(model_entry)){ - yaml_handler.WriteYamlFile(model_yaml_path); - CLI_LOG("Model is imported successfully!"); + if (modellist_utils_obj.AddModelEntry(model_entry)) { + yaml_handler.WriteYamlFile(model_yaml_path); + CLI_LOG("Model is imported successfully!"); + } else { + CLI_LOG("Fail to import model, model_id '" + model_handle_ + + "' already exists!"); } - else{ - CLI_LOG("Fail to import model, model_id '"+model_handle_+"' already exists!" ); - } - + } catch (const std::exception& e) { std::remove(model_yaml_path.c_str()); - CTL_ERR("Error importing model '" << model_path_ << "' with model_id '" - << model_handle_ << "': " << e.what()); + throw std::runtime_error("Error importing model '" + model_path_ + + "' with model_id '" + model_handle_ + + "': " + e.what()); } } } // namespace commands \ No newline at end of file diff --git a/engine/e2e-test/main.py b/engine/e2e-test/main.py index 1df424e65..f5a1c65ff 100644 --- a/engine/e2e-test/main.py +++ b/engine/e2e-test/main.py @@ -9,6 +9,7 @@ from test_cli_server_start import TestCliServerStart from test_cortex_update import TestCortexUpdate from test_create_log_folder import TestCreateLogFolder +from test_cli_model_import import TestCliModelImport if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/engine/e2e-test/test_cli_model_import.py b/engine/e2e-test/test_cli_model_import.py new file mode 100644 index 000000000..1f54ae511 --- /dev/null +++ b/engine/e2e-test/test_cli_model_import.py @@ -0,0 +1,14 @@ +import pytest +from test_runner import run + +class TestCliModelImport: + + @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.") + def test_model_import_should_be_success(self): + + exit_code, output, error = run( + "Pull model", ["models", "import", "--model_id","test_model","--model_path","/path/to/local/gguf"], + timeout=None + ) + assert exit_code == 0, f"Model import failed failed with error: {error}" + # TODO: skip this test. since download model is taking too long \ No newline at end of file From c8bd76e57e1b974d3ac968b0b07b6aa4d90ac439 Mon Sep 17 00:00:00 2001 From: Thuandz Date: Thu, 19 Sep 2024 12:08:54 +0700 Subject: [PATCH 04/10] Add API for import model --- engine/controllers/models.cc | 78 ++++++++++++++++++++++++++++++++++-- engine/controllers/models.h | 3 ++ 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 1d3157fcb..4bfbded61 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -5,11 +5,11 @@ #include "utils/cortex_utils.h" #include "utils/file_manager_utils.h" #include "utils/model_callback_utils.h" +#include "utils/modellist_utils.h" - void - Models::PullModel( - const HttpRequestPtr& req, - std::function&& callback) const { +void Models::PullModel( + const HttpRequestPtr& req, + std::function&& callback) const { if (!http_util::HasFieldInReq(req, callback, "modelId")) { return; } @@ -192,4 +192,74 @@ void Models::DeleteModel(const HttpRequestPtr& req, resp->setStatusCode(k404NotFound); callback(resp); } +} + +void Models::ImportModel( + const HttpRequestPtr& req, + std::function&& callback) const { + if (!http_util::HasFieldInReq(req, callback, "modelId") || + !http_util::HasFieldInReq(req, callback, "modelPath")) { + return; + } + auto modelHandle = (*(req->getJsonObject())).get("modelId", "").asString(); + auto modelPath = (*(req->getJsonObject())).get("modelPath", "").asString(); + config::GGUFHandler gguf_handler; + config::YamlHandler yaml_handler; + modellist_utils::ModelListUtils modellist_utils_obj; + + std::string model_yaml_path = (file_manager_utils::GetModelsContainerPath() / + std::filesystem::path("imported") / + std::filesystem::path(modelHandle + ".yml")) + .string(); + modellist_utils::ModelEntry model_entry{ + modelHandle, "local", "imported", + model_yaml_path, modelHandle, modellist_utils::ModelStatus::READY}; + try { + std::filesystem::create_directories( + std::filesystem::path(model_yaml_path).parent_path()); + gguf_handler.Parse(modelPath); + config::ModelConfig model_config = gguf_handler.GetModelConfig(); + model_config.files.push_back(modelPath); + model_config.name = modelHandle; + yaml_handler.UpdateModelConfig(model_config); + + if (modellist_utils_obj.AddModelEntry(model_entry)) { + yaml_handler.WriteYamlFile(model_yaml_path); + std::string success_message = "Model is imported successfully!"; + LOG_INFO << success_message; + Json::Value ret; + ret["result"] = "OK"; + ret["modelHandle"] = modelHandle; + ret["message"] = success_message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + + } else { + std::string error_message = "Fail to import model, model_id '" + + modelHandle + "' already exists!"; + LOG_ERROR << error_message; + Json::Value ret; + ret["result"] = "Import failed!"; + ret["modelHandle"] = modelHandle; + ret["message"] = error_message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + } + + } catch (const std::exception& e) { + std::remove(model_yaml_path.c_str()); + std::string error_message = "Error importing model '" + modelPath + + "' with model_id '" + modelHandle + + "': " + e.what(); + LOG_ERROR << error_message; + Json::Value ret; + ret["result"] = "Import failed!"; + ret["modelHandle"] = modelHandle; + ret["message"] = error_message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + } } \ No newline at end of file diff --git a/engine/controllers/models.h b/engine/controllers/models.h index 907ab3917..2fd7c9b2d 100644 --- a/engine/controllers/models.h +++ b/engine/controllers/models.h @@ -15,6 +15,7 @@ class Models : public drogon::HttpController { METHOD_ADD(Models::PullModel, "/pull", Post); METHOD_ADD(Models::ListModel, "/list", Get); METHOD_ADD(Models::GetModel, "/get", Post); + METHOD_ADD(Models::ImportModel, "/import", Post); METHOD_ADD(Models::DeleteModel, "/{1}", Delete); METHOD_LIST_END @@ -24,6 +25,8 @@ class Models : public drogon::HttpController { std::function&& callback) const; void GetModel(const HttpRequestPtr& req, std::function&& callback) const; + void ImportModel(const HttpRequestPtr& req, + std::function&& callback) const; void DeleteModel(const HttpRequestPtr& req, std::function&& callback, const std::string& model_id) const; From 4a6d08db5335c9331cbd7c694070efa59b7de091 Mon Sep 17 00:00:00 2001 From: nguyenhoangthuan99 <35255081+nguyenhoangthuan99@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:54:18 +0700 Subject: [PATCH 05/10] Update model_import_cmd.cc --- engine/commands/model_import_cmd.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc index dfc9997f8..e4e8878ec 100644 --- a/engine/commands/model_import_cmd.cc +++ b/engine/commands/model_import_cmd.cc @@ -33,7 +33,7 @@ void ModelImportCmd::Exec() { gguf_handler.Parse(model_path_); config::ModelConfig model_config = gguf_handler.GetModelConfig(); model_config.files.push_back(model_path_); - model_config.name = model_handle_; + model_config.model = model_handle_; yaml_handler.UpdateModelConfig(model_config); if (modellist_utils_obj.AddModelEntry(model_entry)) { @@ -51,4 +51,4 @@ void ModelImportCmd::Exec() { "': " + e.what()); } } -} // namespace commands \ No newline at end of file +} // namespace commands From 009139d9f0addf89814dc3221cf81527bfbc964a Mon Sep 17 00:00:00 2001 From: Thuandz Date: Thu, 19 Sep 2024 16:35:17 +0700 Subject: [PATCH 06/10] Fix comment --- engine/commands/model_import_cmd.cc | 7 +++---- engine/controllers/command_line_parser.cc | 3 +-- engine/controllers/models.cc | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc index dfc9997f8..d0125a03f 100644 --- a/engine/commands/model_import_cmd.cc +++ b/engine/commands/model_import_cmd.cc @@ -31,7 +31,7 @@ void ModelImportCmd::Exec() { std::filesystem::create_directories( std::filesystem::path(model_yaml_path).parent_path()); gguf_handler.Parse(model_path_); - config::ModelConfig model_config = gguf_handler.GetModelConfig(); + auto model_config = gguf_handler.GetModelConfig(); model_config.files.push_back(model_path_); model_config.name = model_handle_; yaml_handler.UpdateModelConfig(model_config); @@ -46,9 +46,8 @@ void ModelImportCmd::Exec() { } catch (const std::exception& e) { std::remove(model_yaml_path.c_str()); - throw std::runtime_error("Error importing model '" + model_path_ + - "' with model_id '" + model_handle_ + - "': " + e.what()); + CLI_LOG("Error importing model path '" + model_path_ + "' with model_id '" + + model_handle_ + "': " + e.what()); } } } // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 0043180db..72015bdfd 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -158,11 +158,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { auto model_import_cmd = models_cmd->add_subcommand( "import", "Import a gguf model from local file"); model_import_cmd->add_option("--model_id", model_id, ""); - model_import_cmd->require_option(); model_import_cmd->add_option("--model_path", model_path, "Absolute path to .gguf model, the path should " "include the gguf file name"); - model_import_cmd->require_option(); + model_import_cmd->require_option(2); model_import_cmd->callback([&model_id,&model_path]() { commands::ModelImportCmd command(model_id, model_path); command.Exec(); diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 4bfbded61..08f2c0a20 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -250,7 +250,7 @@ void Models::ImportModel( } catch (const std::exception& e) { std::remove(model_yaml_path.c_str()); - std::string error_message = "Error importing model '" + modelPath + + std::string error_message = "Error importing model path '" + modelPath + "' with model_id '" + modelHandle + "': " + e.what(); LOG_ERROR << error_message; From a70ae57f821bc4a26f49dfeb09611f9f8beba996 Mon Sep 17 00:00:00 2001 From: Thuandz Date: Thu, 19 Sep 2024 19:37:14 +0700 Subject: [PATCH 07/10] add get/list model api --- engine/commands/model_get_cmd.cc | 253 +++++++++++++-------------- engine/controllers/models.cc | 284 ++++++++++++++++++++----------- engine/utils/modellist_utils.h | 4 +- 3 files changed, 310 insertions(+), 231 deletions(-) diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc index d4f19ffa8..39a903245 100644 --- a/engine/commands/model_get_cmd.cc +++ b/engine/commands/model_get_cmd.cc @@ -1,147 +1,140 @@ #include "model_get_cmd.h" #include +#include #include #include #include "cmd_info.h" #include "config/yaml_config.h" #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" +#include "utils/modellist_utils.h" namespace commands { void ModelGetCmd::Exec(const std::string& model_handle) { - auto models_path = file_manager_utils::GetModelsContainerPath(); - if (std::filesystem::exists(models_path) && - std::filesystem::is_directory(models_path)) { - CmdInfo ci(model_handle); - std::string model_file = - ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch; - bool found_model = false; - // Iterate through directory - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - - if (entry.is_regular_file() && entry.path().stem() == model_file && - entry.path().extension() == ".yaml") { - try { - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - const auto& model_config = handler.GetModelConfig(); - std::cout << "ModelConfig Details:\n"; - std::cout << "-------------------\n"; - - // Print non-null strings - if (!model_config.id.empty()) - std::cout << "id: " << model_config.id << "\n"; - if (!model_config.name.empty()) - std::cout << "name: " << model_config.name << "\n"; - if (!model_config.model.empty()) - std::cout << "model: " << model_config.model << "\n"; - if (!model_config.version.empty()) - std::cout << "version: " << model_config.version << "\n"; - - // Print non-empty vectors - if (!model_config.stop.empty()) { - std::cout << "stop: ["; - for (size_t i = 0; i < model_config.stop.size(); ++i) { - std::cout << model_config.stop[i]; - if (i < model_config.stop.size() - 1) - std::cout << ", "; - } - std::cout << "]\n"; - } - // Print valid numbers - if (!std::isnan(static_cast(model_config.top_p))) - std::cout << "top_p: " << model_config.top_p << "\n"; - if (!std::isnan(static_cast(model_config.temperature))) - std::cout << "temperature: " << model_config.temperature << "\n"; - if (!std::isnan(static_cast(model_config.frequency_penalty))) - std::cout << "frequency_penalty: " << model_config.frequency_penalty - << "\n"; - if (!std::isnan(static_cast(model_config.presence_penalty))) - std::cout << "presence_penalty: " << model_config.presence_penalty - << "\n"; - if (!std::isnan(static_cast(model_config.max_tokens))) - std::cout << "max_tokens: " << model_config.max_tokens << "\n"; - if (!std::isnan(static_cast(model_config.stream))) - - std::cout << "stream: " << std::boolalpha << model_config.stream - << "\n"; - if (!std::isnan(static_cast(model_config.ngl))) - std::cout << "ngl: " << model_config.ngl << "\n"; - if (!std::isnan(static_cast(model_config.ctx_len))) - std::cout << "ctx_len: " << model_config.ctx_len << "\n"; - - // Print non-null strings - if (!model_config.engine.empty()) - std::cout << "engine: " << model_config.engine << "\n"; - if (!model_config.prompt_template.empty()) - - std::cout << "prompt_template: " << model_config.prompt_template - << "\n"; - if (!model_config.system_template.empty()) - std::cout << "system_template: " << model_config.system_template - << "\n"; - if (!model_config.user_template.empty()) - std::cout << "user_template: " << model_config.user_template - << "\n"; - if (!model_config.ai_template.empty()) - std::cout << "ai_template: " << model_config.ai_template << "\n"; - if (!model_config.os.empty()) - std::cout << "os: " << model_config.os << "\n"; - if (!model_config.gpu_arch.empty()) - std::cout << "gpu_arch: " << model_config.gpu_arch << "\n"; - if (!model_config.quantization_method.empty()) - - std::cout << "quantization_method: " - << model_config.quantization_method << "\n"; - if (!model_config.precision.empty()) - std::cout << "precision: " << model_config.precision << "\n"; - - if (!std::isnan(static_cast(model_config.tp))) - std::cout << "tp: " << model_config.tp << "\n"; - - // Print non-null strings - if (!model_config.trtllm_version.empty()) - - std::cout << "trtllm_version: " << model_config.trtllm_version - << "\n"; - if (!std::isnan(static_cast(model_config.text_model))) - std::cout << "text_model: " << std::boolalpha - << model_config.text_model << "\n"; - - // Print non-empty vectors - if (!model_config.files.empty()) { - std::cout << "files: ["; - for (size_t i = 0; i < model_config.files.size(); ++i) { - std::cout << model_config.files[i]; - if (i < model_config.files.size() - 1) - std::cout << ", "; - } - std::cout << "]\n"; - } - - // Print valid size_t number - if (model_config.created != 0) - std::cout << "created: " << model_config.created << "\n"; - - if (!model_config.object.empty()) - std::cout << "object: " << model_config.object << "\n"; - if (!model_config.owned_by.empty()) - std::cout << "owned_by: " << model_config.owned_by << "\n"; - - found_model = true; - break; - } catch (const std::exception& e) { - CTL_ERR("Error reading yaml file '" << entry.path().string() - << "': " << e.what()); - } + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + try { + auto model_entry = modellist_handler.GetModelInfo(model_handle); + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + + // Helper function to print comments + auto print_comment = [](const std::string& comment) { + std::cout << "\033[1;90m# " << comment << "\033[0m\n"; + }; + + print_comment("BEGIN GENERAL GGUF METADATA"); + + // Helper function to print key-value pairs with color + auto print_kv = [](const std::string& key, const auto& value, + const std::string& color = "\033[0m") { + std::cout << "\033[1;32m" << key << ":\033[0m " << color << value + << "\033[0m\n"; + }; + + // Helper function to print boolean values + auto print_bool = [&print_kv](const std::string& key, bool value) { + print_kv(key, value ? "true" : "false", "\033[0;35m"); + }; + + // Print non-empty strings + if (!model_config.id.empty()) + print_kv("id", model_config.id, "\033[0;33m"); + if (!model_config.name.empty()) + print_kv("name", model_config.name, "\033[0;33m"); + if (!model_config.model.empty()) + print_kv("model", model_config.model, "\033[0;33m"); + if (!model_config.version.empty()) + print_kv("version", model_config.version, "\033[0;33m"); + + // Print non-empty vectors + if (!model_config.files.empty()) { + std::cout << "\033[1;32mfiles:\033[0m\n"; + for (const auto& file : model_config.files) { + std::cout << " - \033[0;33m" << file << "\033[0m\n"; } } - if (!found_model) { - CLI_LOG("Model not found!"); + + print_comment("END GENERAL GGUF METADATA"); + print_comment("BEGIN INFERENCE PARAMETERS"); + print_comment("BEGIN REQUIRED"); + + if (!model_config.stop.empty()) { + std::cout << "\033[1;32mstop:\033[0m\n"; + for (const auto& stop : model_config.stop) { + std::cout << " - \033[0;33m" << stop << "\033[0m\n"; + } } - } else { - CLI_LOG("Model not found!"); + + print_comment("END REQUIRED"); + print_comment("BEGIN OPTIONAL"); + + // Print boolean values + print_bool("stream", model_config.stream); + + // Print float values with fixed precision + auto print_float = [&print_kv](const std::string& key, float value) { + if (!std::isnan(value)) { + std::ostringstream oss; + oss << std::fixed << std::setprecision(9) << value; + print_kv(key, oss.str(), "\033[0;34m"); + } + }; + + print_float("top_p", model_config.top_p); + print_float("temperature", model_config.temperature); + print_float("frequency_penalty", model_config.frequency_penalty); + print_float("presence_penalty", model_config.presence_penalty); + + // Print integer values + auto print_int = [&print_kv](const std::string& key, int value) { + if (value != 0) { // Assuming 0 is the default/unset value + print_kv(key, value, "\033[0;35m"); + } + }; + + print_int("max_tokens", static_cast(model_config.max_tokens)); + print_int("seed", model_config.seed); + print_float("dynatemp_range", model_config.dynatemp_range); + print_float("dynatemp_exponent", model_config.dynatemp_exponent); + print_int("top_k", model_config.top_k); + print_float("min_p", model_config.min_p); + print_int("tfs_z", model_config.tfs_z); + print_float("typ_p", model_config.typ_p); + print_int("repeat_last_n", model_config.repeat_last_n); + print_float("repeat_penalty", model_config.repeat_penalty); + print_bool("mirostat", model_config.mirostat); + print_float("mirostat_tau", model_config.mirostat_tau); + print_float("mirostat_eta", model_config.mirostat_eta); + print_bool("penalize_nl", model_config.penalize_nl); + print_bool("ignore_eos", model_config.ignore_eos); + print_int("n_probs", model_config.n_probs); + print_int("min_keep", model_config.min_keep); + + print_comment("END OPTIONAL"); + print_comment("END INFERENCE PARAMETERS"); + print_comment("BEGIN MODEL LOAD PARAMETERS"); + print_comment("BEGIN REQUIRED"); + + if (!model_config.engine.empty()) + print_kv("engine", model_config.engine, "\033[0;33m"); + if (!model_config.prompt_template.empty()) + print_kv("prompt_template", model_config.prompt_template, "\033[0;33m"); + + print_comment("END REQUIRED"); + print_comment("BEGIN OPTIONAL"); + + print_int("ctx_len", static_cast(model_config.ctx_len)); + print_int("ngl", static_cast(model_config.ngl)); + + print_comment("END OPTIONAL"); + print_comment("END MODEL LOAD PARAMETERS"); + + } catch (const std::exception& e) { + CLI_LOG("Fail to get model information with ID '" + model_handle + + "': " + e.what()); } } -}; // namespace commands + +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 08f2c0a20..4013e7b9f 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -52,58 +52,108 @@ void Models::ListModel( Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - auto models_path = file_manager_utils::GetModelsContainerPath(); - if (std::filesystem::exists(models_path) && - std::filesystem::is_directory(models_path)) { - // Iterate through directory - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - if (entry.is_regular_file() && entry.path().extension() == ".yaml") { - try { - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - auto const& model_config = handler.GetModelConfig(); - Json::Value obj; - obj["name"] = model_config.name; - obj["model"] = model_config.model; - obj["version"] = model_config.version; - Json::Value stop_array(Json::arrayValue); - for (const std::string& stop : model_config.stop) - stop_array.append(stop); - obj["stop"] = stop_array; - obj["top_p"] = model_config.top_p; - obj["temperature"] = model_config.temperature; - obj["presence_penalty"] = model_config.presence_penalty; - obj["max_tokens"] = model_config.max_tokens; - obj["stream"] = model_config.stream; - obj["ngl"] = model_config.ngl; - obj["ctx_len"] = model_config.ctx_len; - obj["engine"] = model_config.engine; - obj["prompt_template"] = model_config.prompt_template; - - Json::Value files_array(Json::arrayValue); - for (const std::string& file : model_config.files) - files_array.append(file); - obj["files"] = files_array; - obj["id"] = model_config.id; - obj["created"] = static_cast(model_config.created); - obj["object"] = model_config.object; - obj["owned_by"] = model_config.owned_by; - if (model_config.engine == "cortex.tensorrt-llm") { - obj["trtllm_version"] = model_config.trtllm_version; - } - data.append(std::move(obj)); - } catch (const std::exception& e) { - LOG_ERROR << "Error reading yaml file '" << entry.path().string() - << "': " << e.what(); + + // Iterate through directory + + try { + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + + auto list_entry = modellist_handler.LoadModelList(); + + for (const auto& model_entry : list_entry) { + // auto model_entry = modellist_handler.GetModelInfo(model_handle); + try { + + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + Json::Value obj; + obj["id"] = model_config.id; + obj["name"] = model_config.name; + obj["model"] = model_config.model; + obj["version"] = model_config.version; + + Json::Value stop_array(Json::arrayValue); + for (const std::string& stop : model_config.stop) + stop_array.append(stop); + obj["stop"] = stop_array; + + obj["stream"] = model_config.stream; + obj["top_p"] = model_config.top_p; + obj["temperature"] = model_config.temperature; + obj["frequency_penalty"] = model_config.frequency_penalty; + obj["presence_penalty"] = model_config.presence_penalty; + obj["max_tokens"] = static_cast(model_config.max_tokens); + + // New fields + obj["seed"] = model_config.seed; + obj["dynatemp_range"] = model_config.dynatemp_range; + obj["dynatemp_exponent"] = model_config.dynatemp_exponent; + obj["top_k"] = model_config.top_k; + obj["min_p"] = model_config.min_p; + obj["tfs_z"] = model_config.tfs_z; + obj["typ_p"] = model_config.typ_p; + obj["repeat_last_n"] = model_config.repeat_last_n; + obj["repeat_penalty"] = model_config.repeat_penalty; + obj["mirostat"] = model_config.mirostat; + obj["mirostat_tau"] = model_config.mirostat_tau; + obj["mirostat_eta"] = model_config.mirostat_eta; + obj["penalize_nl"] = model_config.penalize_nl; + obj["ignore_eos"] = model_config.ignore_eos; + obj["n_probs"] = model_config.n_probs; + obj["min_keep"] = model_config.min_keep; + + obj["ngl"] = model_config.ngl; + obj["ctx_len"] = static_cast(model_config.ctx_len); + obj["engine"] = model_config.engine; + obj["prompt_template"] = model_config.prompt_template; + obj["system_template"] = model_config.system_template; + obj["user_template"] = model_config.user_template; + obj["ai_template"] = model_config.ai_template; + + obj["os"] = model_config.os; + obj["gpu_arch"] = model_config.gpu_arch; + obj["quantization_method"] = model_config.quantization_method; + obj["precision"] = model_config.precision; + + Json::Value files_array(Json::arrayValue); + for (const std::string& file : model_config.files) + files_array.append(file); + obj["files"] = files_array; + + obj["created"] = static_cast(model_config.created); + obj["object"] = model_config.object; + obj["owned_by"] = model_config.owned_by; + obj["text_model"] = model_config.text_model; + + if (model_config.engine == "cortex.tensorrt-llm") { + obj["trtllm_version"] = model_config.trtllm_version; + obj["tp"] = model_config.tp; } + + data.append(std::move(obj)); + yaml_handler.Reset(); + } catch (const std::exception& e) { + LOG_ERROR << "Failed to load yaml file for model: " + << model_entry.path_to_model_yaml << ", error: " << e.what(); } } + ret["data"] = data; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::exception& e) { + std::string message = + "Fail to get list model information: " + std::string(e.what()); + LOG_ERROR << message; + ret["data"] = data; + ret["result"] = "Fail to get list model information"; + ret["message"] = message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); } - ret["data"] = data; - ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); - resp->setStatusCode(k200OK); - callback(resp); } void Models::GetModel( @@ -117,59 +167,95 @@ void Models::GetModel( Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - if (std::filesystem::exists(cortex_utils::models_folder) && - std::filesystem::is_directory(cortex_utils::models_folder)) { - // Iterate through directory - for (const auto& entry : - std::filesystem::directory_iterator(cortex_utils::models_folder)) { - if (entry.is_regular_file() && entry.path().extension() == ".yaml" && - entry.path().stem() == model_handle) { - try { - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - auto const& model_config = handler.GetModelConfig(); - Json::Value obj; - obj["name"] = model_config.name; - obj["model"] = model_config.model; - obj["version"] = model_config.version; - Json::Value stop_array(Json::arrayValue); - for (const std::string& stop : model_config.stop) - stop_array.append(stop); - obj["stop"] = stop_array; - obj["top_p"] = model_config.top_p; - obj["temperature"] = model_config.temperature; - obj["presence_penalty"] = model_config.presence_penalty; - obj["max_tokens"] = model_config.max_tokens; - obj["stream"] = model_config.stream; - obj["ngl"] = model_config.ngl; - obj["ctx_len"] = model_config.ctx_len; - obj["engine"] = model_config.engine; - obj["prompt_template"] = model_config.prompt_template; - - Json::Value files_array(Json::arrayValue); - for (const std::string& file : model_config.files) - files_array.append(file); - obj["files"] = files_array; - obj["id"] = model_config.id; - obj["created"] = static_cast(model_config.created); - obj["object"] = model_config.object; - obj["owned_by"] = model_config.owned_by; - if (model_config.engine == "cortex.tensorrt-llm") { - obj["trtllm_version"] = model_config.trtllm_version; - } - data.append(std::move(obj)); - } catch (const std::exception& e) { - LOG_ERROR << "Error reading yaml file '" << entry.path().string() - << "': " << e.what(); - } - } + + try { + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + auto model_entry = modellist_handler.GetModelInfo(model_handle); + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + + Json::Value obj; + obj["id"] = model_config.id; + obj["name"] = model_config.name; + obj["model"] = model_config.model; + obj["version"] = model_config.version; + + Json::Value stop_array(Json::arrayValue); + for (const std::string& stop : model_config.stop) + stop_array.append(stop); + obj["stop"] = stop_array; + + obj["stream"] = model_config.stream; + obj["top_p"] = model_config.top_p; + obj["temperature"] = model_config.temperature; + obj["frequency_penalty"] = model_config.frequency_penalty; + obj["presence_penalty"] = model_config.presence_penalty; + obj["max_tokens"] = static_cast(model_config.max_tokens); + + // New fields + obj["seed"] = model_config.seed; + obj["dynatemp_range"] = model_config.dynatemp_range; + obj["dynatemp_exponent"] = model_config.dynatemp_exponent; + obj["top_k"] = model_config.top_k; + obj["min_p"] = model_config.min_p; + obj["tfs_z"] = model_config.tfs_z; + obj["typ_p"] = model_config.typ_p; + obj["repeat_last_n"] = model_config.repeat_last_n; + obj["repeat_penalty"] = model_config.repeat_penalty; + obj["mirostat"] = model_config.mirostat; + obj["mirostat_tau"] = model_config.mirostat_tau; + obj["mirostat_eta"] = model_config.mirostat_eta; + obj["penalize_nl"] = model_config.penalize_nl; + obj["ignore_eos"] = model_config.ignore_eos; + obj["n_probs"] = model_config.n_probs; + obj["min_keep"] = model_config.min_keep; + + obj["ngl"] = model_config.ngl; + obj["ctx_len"] = static_cast(model_config.ctx_len); + obj["engine"] = model_config.engine; + obj["prompt_template"] = model_config.prompt_template; + obj["system_template"] = model_config.system_template; + obj["user_template"] = model_config.user_template; + obj["ai_template"] = model_config.ai_template; + + obj["os"] = model_config.os; + obj["gpu_arch"] = model_config.gpu_arch; + obj["quantization_method"] = model_config.quantization_method; + obj["precision"] = model_config.precision; + + Json::Value files_array(Json::arrayValue); + for (const std::string& file : model_config.files) + files_array.append(file); + obj["files"] = files_array; + + obj["created"] = static_cast(model_config.created); + obj["object"] = model_config.object; + obj["owned_by"] = model_config.owned_by; + obj["text_model"] = model_config.text_model; + + if (model_config.engine == "cortex.tensorrt-llm") { + obj["trtllm_version"] = model_config.trtllm_version; + obj["tp"] = model_config.tp; } + + data.append(std::move(obj)); + ret["data"] = data; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::exception& e) { + std::string message = "Fail to get model information with ID '" + + model_handle + "': " + e.what(); + LOG_ERROR << message; + ret["data"] = data; + ret["result"] = "Fail to get model information"; + ret["message"] = message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); } - ret["data"] = data; - ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); - resp->setStatusCode(k200OK); - callback(resp); } void Models::DeleteModel(const HttpRequestPtr& req, diff --git a/engine/utils/modellist_utils.h b/engine/utils/modellist_utils.h index e8efab0d2..4a95a3661 100644 --- a/engine/utils/modellist_utils.h +++ b/engine/utils/modellist_utils.h @@ -22,14 +22,14 @@ class ModelListUtils { private: mutable std::mutex mutex_; // For thread safety - std::vector LoadModelList() const; - bool IsUnique(const std::vector& entries, + bool IsUnique(const std::vector& entries, const std::string& model_id, const std::string& model_alias) const; void SaveModelList(const std::vector& entries) const; public: static const std::string kModelListPath; + std::vector LoadModelList() const; ModelListUtils() = default; std::string GenerateShortenedAlias( const std::string& model_id, From 337b1998eaf55e18d57732156f139e492c3764c4 Mon Sep 17 00:00:00 2001 From: Thuandz Date: Thu, 19 Sep 2024 21:53:27 +0700 Subject: [PATCH 08/10] add model list command --- engine/commands/model_list_cmd.cc | 84 +++++++++++++++++-------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/engine/commands/model_list_cmd.cc b/engine/commands/model_list_cmd.cc index e0ca88bd3..6e3990eb6 100644 --- a/engine/commands/model_list_cmd.cc +++ b/engine/commands/model_list_cmd.cc @@ -6,51 +6,59 @@ #include "config/yaml_config.h" #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" +#include "utils/modellist_utils.h" namespace commands { void ModelListCmd::Exec() { auto models_path = file_manager_utils::GetModelsContainerPath(); - if (std::filesystem::exists(models_path) && - std::filesystem::is_directory(models_path)) { - tabulate::Table table; + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + tabulate::Table table; - table.add_row({"(Index)", "ID", "engine", "version"}); - table.format().font_color(tabulate::Color::green); - int count = 0; - // Iterate through directory - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - if (entry.is_regular_file() && entry.path().extension() == ".yaml") { - try { - count += 1; - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - const auto& model_config = handler.GetModelConfig(); - table.add_row({std::to_string(count), model_config.id, - model_config.engine, model_config.version}); - } catch (const std::exception& e) { - CTL_ERR("Error reading yaml file '" << entry.path().string() - << "': " << e.what()); - } + table.add_row({"(Index)", "ID", "model alias", "engine", "version"}); + table.format().font_color(tabulate::Color::green); + int count = 0; + // Iterate through directory + + try { + auto list_entry = modellist_handler.LoadModelList(); + for (const auto& model_entry : list_entry) { + // auto model_entry = modellist_handler.GetModelInfo(model_handle); + try { + count += 1; + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + table.add_row({std::to_string(count), model_entry.model_id, + model_entry.model_alias, model_config.engine, + model_config.version}); + yaml_handler.Reset(); + } catch (const std::exception& e) { + CTL_ERR("Fail to get list model information: " + std::string(e.what())); } } - for (int i = 0; i < 4; i++) { - table[0][i] - .format() - .font_color(tabulate::Color::white) // Set font color - .font_style({tabulate::FontStyle::bold}) - .font_align(tabulate::FontAlign::center); - } - for (int i = 1; i <= count; i++) { - table[i][0] //index value - .format() - .font_color(tabulate::Color::white) // Set font color - .font_align(tabulate::FontAlign::center); - table[i][3] //version value - .format() - .font_align(tabulate::FontAlign::center); - } - std::cout << table << std::endl; + } catch (const std::exception& e) { + CTL_ERR("Fail to get list model information: " + std::string(e.what())); + } + + for (int i = 0; i < 5; i++) { + table[0][i] + .format() + .font_color(tabulate::Color::white) // Set font color + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center); + } + for (int i = 1; i <= count; i++) { + table[i][0] //index value + .format() + .font_color(tabulate::Color::white) // Set font color + .font_align(tabulate::FontAlign::center); + table[i][4] //version value + .format() + .font_align(tabulate::FontAlign::center); } + std::cout << table << std::endl; } -}; // namespace commands +} + +; // namespace commands From ad72ea40b808ad844ad004be5fe7c1f87a918d61 Mon Sep 17 00:00:00 2001 From: Thuandz Date: Fri, 20 Sep 2024 09:00:30 +0700 Subject: [PATCH 09/10] Fix comment --- engine/commands/model_import_cmd.h | 1 - 1 file changed, 1 deletion(-) diff --git a/engine/commands/model_import_cmd.h b/engine/commands/model_import_cmd.h index b1441a281..d4248281f 100644 --- a/engine/commands/model_import_cmd.h +++ b/engine/commands/model_import_cmd.h @@ -1,6 +1,5 @@ #pragma once -#include // For std::isnan #include namespace commands { From acf831fbc4619eb7de1bfe882e87e87487512aa5 Mon Sep 17 00:00:00 2001 From: Thuandz Date: Fri, 20 Sep 2024 10:44:51 +0700 Subject: [PATCH 10/10] Fix comment --- engine/commands/model_get_cmd.cc | 112 +---------------- engine/commands/model_import_cmd.cc | 3 +- engine/config/model_config.h | 188 ++++++++++++++++++++++++++++ engine/controllers/models.cc | 131 +------------------ 4 files changed, 194 insertions(+), 240 deletions(-) diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc index 39a903245..715728c1f 100644 --- a/engine/commands/model_get_cmd.cc +++ b/engine/commands/model_get_cmd.cc @@ -19,117 +19,7 @@ void ModelGetCmd::Exec(const std::string& model_handle) { yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); auto model_config = yaml_handler.GetModelConfig(); - // Helper function to print comments - auto print_comment = [](const std::string& comment) { - std::cout << "\033[1;90m# " << comment << "\033[0m\n"; - }; - - print_comment("BEGIN GENERAL GGUF METADATA"); - - // Helper function to print key-value pairs with color - auto print_kv = [](const std::string& key, const auto& value, - const std::string& color = "\033[0m") { - std::cout << "\033[1;32m" << key << ":\033[0m " << color << value - << "\033[0m\n"; - }; - - // Helper function to print boolean values - auto print_bool = [&print_kv](const std::string& key, bool value) { - print_kv(key, value ? "true" : "false", "\033[0;35m"); - }; - - // Print non-empty strings - if (!model_config.id.empty()) - print_kv("id", model_config.id, "\033[0;33m"); - if (!model_config.name.empty()) - print_kv("name", model_config.name, "\033[0;33m"); - if (!model_config.model.empty()) - print_kv("model", model_config.model, "\033[0;33m"); - if (!model_config.version.empty()) - print_kv("version", model_config.version, "\033[0;33m"); - - // Print non-empty vectors - if (!model_config.files.empty()) { - std::cout << "\033[1;32mfiles:\033[0m\n"; - for (const auto& file : model_config.files) { - std::cout << " - \033[0;33m" << file << "\033[0m\n"; - } - } - - print_comment("END GENERAL GGUF METADATA"); - print_comment("BEGIN INFERENCE PARAMETERS"); - print_comment("BEGIN REQUIRED"); - - if (!model_config.stop.empty()) { - std::cout << "\033[1;32mstop:\033[0m\n"; - for (const auto& stop : model_config.stop) { - std::cout << " - \033[0;33m" << stop << "\033[0m\n"; - } - } - - print_comment("END REQUIRED"); - print_comment("BEGIN OPTIONAL"); - - // Print boolean values - print_bool("stream", model_config.stream); - - // Print float values with fixed precision - auto print_float = [&print_kv](const std::string& key, float value) { - if (!std::isnan(value)) { - std::ostringstream oss; - oss << std::fixed << std::setprecision(9) << value; - print_kv(key, oss.str(), "\033[0;34m"); - } - }; - - print_float("top_p", model_config.top_p); - print_float("temperature", model_config.temperature); - print_float("frequency_penalty", model_config.frequency_penalty); - print_float("presence_penalty", model_config.presence_penalty); - - // Print integer values - auto print_int = [&print_kv](const std::string& key, int value) { - if (value != 0) { // Assuming 0 is the default/unset value - print_kv(key, value, "\033[0;35m"); - } - }; - - print_int("max_tokens", static_cast(model_config.max_tokens)); - print_int("seed", model_config.seed); - print_float("dynatemp_range", model_config.dynatemp_range); - print_float("dynatemp_exponent", model_config.dynatemp_exponent); - print_int("top_k", model_config.top_k); - print_float("min_p", model_config.min_p); - print_int("tfs_z", model_config.tfs_z); - print_float("typ_p", model_config.typ_p); - print_int("repeat_last_n", model_config.repeat_last_n); - print_float("repeat_penalty", model_config.repeat_penalty); - print_bool("mirostat", model_config.mirostat); - print_float("mirostat_tau", model_config.mirostat_tau); - print_float("mirostat_eta", model_config.mirostat_eta); - print_bool("penalize_nl", model_config.penalize_nl); - print_bool("ignore_eos", model_config.ignore_eos); - print_int("n_probs", model_config.n_probs); - print_int("min_keep", model_config.min_keep); - - print_comment("END OPTIONAL"); - print_comment("END INFERENCE PARAMETERS"); - print_comment("BEGIN MODEL LOAD PARAMETERS"); - print_comment("BEGIN REQUIRED"); - - if (!model_config.engine.empty()) - print_kv("engine", model_config.engine, "\033[0;33m"); - if (!model_config.prompt_template.empty()) - print_kv("prompt_template", model_config.prompt_template, "\033[0;33m"); - - print_comment("END REQUIRED"); - print_comment("BEGIN OPTIONAL"); - - print_int("ctx_len", static_cast(model_config.ctx_len)); - print_int("ngl", static_cast(model_config.ngl)); - - print_comment("END OPTIONAL"); - print_comment("END MODEL LOAD PARAMETERS"); + std::cout << model_config.ToString() << std::endl; } catch (const std::exception& e) { CLI_LOG("Fail to get model information with ID '" + model_handle + diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc index 830a1fdd7..193b2488b 100644 --- a/engine/commands/model_import_cmd.cc +++ b/engine/commands/model_import_cmd.cc @@ -45,7 +45,8 @@ void ModelImportCmd::Exec() { } } catch (const std::exception& e) { - std::remove(model_yaml_path.c_str()); + // don't need to remove yml file here, because it's written only if model entry is successfully added, + // remove file here can make it fail with edge case when user try to import new model with existed model_id CLI_LOG("Error importing model path '" + model_path_ + "' with model_id '" + model_handle_ + "': " + e.what()); } diff --git a/engine/config/model_config.h b/engine/config/model_config.h index f61f9e9ba..74410db52 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -1,6 +1,10 @@ #pragma once +#include +#include +#include #include +#include #include #include @@ -54,5 +58,189 @@ struct ModelConfig { int n_probs = 0; int min_keep = 0; std::string grammar; + + Json::Value ToJson() const { + Json::Value obj; + + obj["id"] = id; + obj["name"] = name; + obj["model"] = model; + obj["version"] = version; + + Json::Value stop_array(Json::arrayValue); + for (const auto& s : stop) { + stop_array.append(s); + } + obj["stop"] = stop_array; + + obj["stream"] = stream; + obj["top_p"] = top_p; + obj["temperature"] = temperature; + obj["frequency_penalty"] = frequency_penalty; + obj["presence_penalty"] = presence_penalty; + obj["max_tokens"] = max_tokens; + obj["seed"] = seed; + obj["dynatemp_range"] = dynatemp_range; + obj["dynatemp_exponent"] = dynatemp_exponent; + obj["top_k"] = top_k; + obj["min_p"] = min_p; + obj["tfs_z"] = tfs_z; + obj["typ_p"] = typ_p; + obj["repeat_last_n"] = repeat_last_n; + obj["repeat_penalty"] = repeat_penalty; + obj["mirostat"] = mirostat; + obj["mirostat_tau"] = mirostat_tau; + obj["mirostat_eta"] = mirostat_eta; + obj["penalize_nl"] = penalize_nl; + obj["ignore_eos"] = ignore_eos; + obj["n_probs"] = n_probs; + obj["min_keep"] = min_keep; + obj["ngl"] = ngl; + obj["ctx_len"] = ctx_len; + obj["engine"] = engine; + obj["prompt_template"] = prompt_template; + obj["system_template"] = system_template; + obj["user_template"] = user_template; + obj["ai_template"] = ai_template; + obj["os"] = os; + obj["gpu_arch"] = gpu_arch; + obj["quantization_method"] = quantization_method; + obj["precision"] = precision; + + Json::Value files_array(Json::arrayValue); + for (const auto& file : files) { + files_array.append(file); + } + obj["files"] = files_array; + + obj["created"] = static_cast(created); + obj["object"] = object; + obj["owned_by"] = owned_by; + obj["text_model"] = text_model; + + if (engine == "cortex.tensorrt-llm") { + obj["trtllm_version"] = trtllm_version; + obj["tp"] = tp; + } + + return obj; + } + std::string ToString() const { + std::ostringstream oss; + + // Color codes + const std::string RESET = "\033[0m"; + const std::string BOLD = "\033[1m"; + const std::string GREEN = "\033[1;32m"; + const std::string YELLOW = "\033[0;33m"; + const std::string BLUE = "\033[0;34m"; + const std::string MAGENTA = "\033[0;35m"; + const std::string GRAY = "\033[1;90m"; + + // Helper function to print comments + auto print_comment = [&oss, &GRAY, &RESET](const std::string& comment) { + oss << GRAY << "# " << comment << RESET << "\n"; + }; + + // Helper function to print key-value pairs + auto print_kv = [&oss, &GREEN, &RESET]( + const std::string& key, const auto& value, + const std::string& color = "\033[0m") { + oss << GREEN << key << ":" << RESET << " " << color << value << RESET + << "\n"; + }; + + // Helper function to print boolean values + auto print_bool = [&print_kv, &MAGENTA](const std::string& key, + bool value) { + print_kv(key, value ? "true" : "false", MAGENTA); + }; + + // Helper function to print float values with fixed precision + auto print_float = [&print_kv, &BLUE](const std::string& key, float value) { + if (!std::isnan(value)) { + std::ostringstream float_oss; + float_oss << std::fixed << std::setprecision(9) << value; + print_kv(key, float_oss.str(), BLUE); + } + }; + + print_comment("BEGIN GENERAL GGUF METADATA"); + if (!id.empty()) + print_kv("id", id, YELLOW); + if (!name.empty()) + print_kv("name", name, YELLOW); + if (!model.empty()) + print_kv("model", model, YELLOW); + if (!version.empty()) + print_kv("version", version, YELLOW); + if (!files.empty()) { + oss << GREEN << "files:" << RESET << "\n"; + for (const auto& file : files) { + oss << " - " << YELLOW << file << RESET << "\n"; + } + } + print_comment("END GENERAL GGUF METADATA"); + + print_comment("BEGIN INFERENCE PARAMETERS"); + print_comment("BEGIN REQUIRED"); + if (!stop.empty()) { + oss << GREEN << "stop:" << RESET << "\n"; + for (const auto& s : stop) { + oss << " - " << YELLOW << s << RESET << "\n"; + } + } + print_comment("END REQUIRED"); + print_comment("BEGIN OPTIONAL"); + + print_bool("stream", stream); + print_float("top_p", top_p); + print_float("temperature", temperature); + print_float("frequency_penalty", frequency_penalty); + print_float("presence_penalty", presence_penalty); + if (max_tokens != std::numeric_limits::quiet_NaN()) + print_kv("max_tokens", max_tokens, MAGENTA); + if (seed != -1) + print_kv("seed", seed, MAGENTA); + print_float("dynatemp_range", dynatemp_range); + print_float("dynatemp_exponent", dynatemp_exponent); + print_kv("top_k", top_k, MAGENTA); + print_float("min_p", min_p); + print_kv("tfs_z", tfs_z, MAGENTA); + print_float("typ_p", typ_p); + print_kv("repeat_last_n", repeat_last_n, MAGENTA); + print_float("repeat_penalty", repeat_penalty); + print_bool("mirostat", mirostat); + print_float("mirostat_tau", mirostat_tau); + print_float("mirostat_eta", mirostat_eta); + print_bool("penalize_nl", penalize_nl); + print_bool("ignore_eos", ignore_eos); + print_kv("n_probs", n_probs, MAGENTA); + print_kv("min_keep", min_keep, MAGENTA); + + print_comment("END OPTIONAL"); + print_comment("END INFERENCE PARAMETERS"); + print_comment("BEGIN MODEL LOAD PARAMETERS"); + print_comment("BEGIN REQUIRED"); + + if (!engine.empty()) + print_kv("engine", engine, YELLOW); + if (!prompt_template.empty()) + print_kv("prompt_template", prompt_template, YELLOW); + + print_comment("END REQUIRED"); + print_comment("BEGIN OPTIONAL"); + + if (ctx_len != std::numeric_limits::quiet_NaN()) + print_kv("ctx_len", ctx_len, MAGENTA); + if (ngl != std::numeric_limits::quiet_NaN()) + print_kv("ngl", ngl, MAGENTA); + + print_comment("END OPTIONAL"); + print_comment("END MODEL LOAD PARAMETERS"); + + return oss.str(); + } }; + } // namespace config diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 8eff6a130..e857d89da 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -67,69 +67,7 @@ void Models::ListModel( yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); auto model_config = yaml_handler.GetModelConfig(); - Json::Value obj; - obj["id"] = model_config.id; - obj["name"] = model_config.name; - obj["model"] = model_config.model; - obj["version"] = model_config.version; - - Json::Value stop_array(Json::arrayValue); - for (const std::string& stop : model_config.stop) - stop_array.append(stop); - obj["stop"] = stop_array; - - obj["stream"] = model_config.stream; - obj["top_p"] = model_config.top_p; - obj["temperature"] = model_config.temperature; - obj["frequency_penalty"] = model_config.frequency_penalty; - obj["presence_penalty"] = model_config.presence_penalty; - obj["max_tokens"] = static_cast(model_config.max_tokens); - - // New fields - obj["seed"] = model_config.seed; - obj["dynatemp_range"] = model_config.dynatemp_range; - obj["dynatemp_exponent"] = model_config.dynatemp_exponent; - obj["top_k"] = model_config.top_k; - obj["min_p"] = model_config.min_p; - obj["tfs_z"] = model_config.tfs_z; - obj["typ_p"] = model_config.typ_p; - obj["repeat_last_n"] = model_config.repeat_last_n; - obj["repeat_penalty"] = model_config.repeat_penalty; - obj["mirostat"] = model_config.mirostat; - obj["mirostat_tau"] = model_config.mirostat_tau; - obj["mirostat_eta"] = model_config.mirostat_eta; - obj["penalize_nl"] = model_config.penalize_nl; - obj["ignore_eos"] = model_config.ignore_eos; - obj["n_probs"] = model_config.n_probs; - obj["min_keep"] = model_config.min_keep; - - obj["ngl"] = model_config.ngl; - obj["ctx_len"] = static_cast(model_config.ctx_len); - obj["engine"] = model_config.engine; - obj["prompt_template"] = model_config.prompt_template; - obj["system_template"] = model_config.system_template; - obj["user_template"] = model_config.user_template; - obj["ai_template"] = model_config.ai_template; - - obj["os"] = model_config.os; - obj["gpu_arch"] = model_config.gpu_arch; - obj["quantization_method"] = model_config.quantization_method; - obj["precision"] = model_config.precision; - - Json::Value files_array(Json::arrayValue); - for (const std::string& file : model_config.files) - files_array.append(file); - obj["files"] = files_array; - - obj["created"] = static_cast(model_config.created); - obj["object"] = model_config.object; - obj["owned_by"] = model_config.owned_by; - obj["text_model"] = model_config.text_model; - - if (model_config.engine == "cortex.tensorrt-llm") { - obj["trtllm_version"] = model_config.trtllm_version; - obj["tp"] = model_config.tp; - } + Json::Value obj = model_config.ToJson(); data.append(std::move(obj)); yaml_handler.Reset(); @@ -175,70 +113,8 @@ void Models::GetModel( yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); auto model_config = yaml_handler.GetModelConfig(); - Json::Value obj; - obj["id"] = model_config.id; - obj["name"] = model_config.name; - obj["model"] = model_config.model; - obj["version"] = model_config.version; - - Json::Value stop_array(Json::arrayValue); - for (const std::string& stop : model_config.stop) - stop_array.append(stop); - obj["stop"] = stop_array; - - obj["stream"] = model_config.stream; - obj["top_p"] = model_config.top_p; - obj["temperature"] = model_config.temperature; - obj["frequency_penalty"] = model_config.frequency_penalty; - obj["presence_penalty"] = model_config.presence_penalty; - obj["max_tokens"] = static_cast(model_config.max_tokens); - - // New fields - obj["seed"] = model_config.seed; - obj["dynatemp_range"] = model_config.dynatemp_range; - obj["dynatemp_exponent"] = model_config.dynatemp_exponent; - obj["top_k"] = model_config.top_k; - obj["min_p"] = model_config.min_p; - obj["tfs_z"] = model_config.tfs_z; - obj["typ_p"] = model_config.typ_p; - obj["repeat_last_n"] = model_config.repeat_last_n; - obj["repeat_penalty"] = model_config.repeat_penalty; - obj["mirostat"] = model_config.mirostat; - obj["mirostat_tau"] = model_config.mirostat_tau; - obj["mirostat_eta"] = model_config.mirostat_eta; - obj["penalize_nl"] = model_config.penalize_nl; - obj["ignore_eos"] = model_config.ignore_eos; - obj["n_probs"] = model_config.n_probs; - obj["min_keep"] = model_config.min_keep; - - obj["ngl"] = model_config.ngl; - obj["ctx_len"] = static_cast(model_config.ctx_len); - obj["engine"] = model_config.engine; - obj["prompt_template"] = model_config.prompt_template; - obj["system_template"] = model_config.system_template; - obj["user_template"] = model_config.user_template; - obj["ai_template"] = model_config.ai_template; - - obj["os"] = model_config.os; - obj["gpu_arch"] = model_config.gpu_arch; - obj["quantization_method"] = model_config.quantization_method; - obj["precision"] = model_config.precision; - - Json::Value files_array(Json::arrayValue); - for (const std::string& file : model_config.files) - files_array.append(file); - obj["files"] = files_array; - - obj["created"] = static_cast(model_config.created); - obj["object"] = model_config.object; - obj["owned_by"] = model_config.owned_by; - obj["text_model"] = model_config.text_model; - - if (model_config.engine == "cortex.tensorrt-llm") { - obj["trtllm_version"] = model_config.trtllm_version; - obj["tp"] = model_config.tp; - } - + Json::Value obj = model_config.ToJson(); + data.append(std::move(obj)); ret["data"] = data; ret["result"] = "OK"; @@ -336,7 +212,6 @@ void Models::ImportModel( } } catch (const std::exception& e) { - std::remove(model_yaml_path.c_str()); std::string error_message = "Error importing model path '" + modelPath + "' with model_id '" + modelHandle + "': " + e.what();