diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc index d4f19ffa8..715728c1f 100644 --- a/engine/commands/model_get_cmd.cc +++ b/engine/commands/model_get_cmd.cc @@ -1,147 +1,30 @@ #include "model_get_cmd.h" #include +#include #include #include #include "cmd_info.h" #include "config/yaml_config.h" #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" +#include "utils/modellist_utils.h" namespace commands { void ModelGetCmd::Exec(const std::string& model_handle) { - auto models_path = file_manager_utils::GetModelsContainerPath(); - if (std::filesystem::exists(models_path) && - std::filesystem::is_directory(models_path)) { - CmdInfo ci(model_handle); - std::string model_file = - ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch; - bool found_model = false; - // Iterate through directory - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - - if (entry.is_regular_file() && entry.path().stem() == model_file && - entry.path().extension() == ".yaml") { - try { - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - const auto& model_config = handler.GetModelConfig(); - std::cout << "ModelConfig Details:\n"; - std::cout << "-------------------\n"; - - // Print non-null strings - if (!model_config.id.empty()) - std::cout << "id: " << model_config.id << "\n"; - if (!model_config.name.empty()) - std::cout << "name: " << model_config.name << "\n"; - if (!model_config.model.empty()) - std::cout << "model: " << model_config.model << "\n"; - if (!model_config.version.empty()) - std::cout << "version: " << model_config.version << "\n"; - - // Print non-empty vectors - if (!model_config.stop.empty()) { - std::cout << "stop: ["; - for (size_t i = 0; i < model_config.stop.size(); ++i) { - std::cout << model_config.stop[i]; - if (i < model_config.stop.size() - 1) - std::cout << ", "; - } - std::cout << "]\n"; - } - // Print valid numbers - if (!std::isnan(static_cast(model_config.top_p))) - std::cout << "top_p: " << model_config.top_p << "\n"; - if (!std::isnan(static_cast(model_config.temperature))) - std::cout << "temperature: " << model_config.temperature << "\n"; - if (!std::isnan(static_cast(model_config.frequency_penalty))) - std::cout << "frequency_penalty: " << model_config.frequency_penalty - << "\n"; - if (!std::isnan(static_cast(model_config.presence_penalty))) - std::cout << "presence_penalty: " << model_config.presence_penalty - << "\n"; - if (!std::isnan(static_cast(model_config.max_tokens))) - std::cout << "max_tokens: " << model_config.max_tokens << "\n"; - if (!std::isnan(static_cast(model_config.stream))) - - std::cout << "stream: " << std::boolalpha << model_config.stream - << "\n"; - if (!std::isnan(static_cast(model_config.ngl))) - std::cout << "ngl: " << model_config.ngl << "\n"; - if (!std::isnan(static_cast(model_config.ctx_len))) - std::cout << "ctx_len: " << model_config.ctx_len << "\n"; - - // Print non-null strings - if (!model_config.engine.empty()) - std::cout << "engine: " << model_config.engine << "\n"; - if (!model_config.prompt_template.empty()) - - std::cout << "prompt_template: " << model_config.prompt_template - << "\n"; - if (!model_config.system_template.empty()) - std::cout << "system_template: " << model_config.system_template - << "\n"; - if (!model_config.user_template.empty()) - std::cout << "user_template: " << model_config.user_template - << "\n"; - if (!model_config.ai_template.empty()) - std::cout << "ai_template: " << model_config.ai_template << "\n"; - if (!model_config.os.empty()) - std::cout << "os: " << model_config.os << "\n"; - if (!model_config.gpu_arch.empty()) - std::cout << "gpu_arch: " << model_config.gpu_arch << "\n"; - if (!model_config.quantization_method.empty()) - - std::cout << "quantization_method: " - << model_config.quantization_method << "\n"; - if (!model_config.precision.empty()) - std::cout << "precision: " << model_config.precision << "\n"; - - if (!std::isnan(static_cast(model_config.tp))) - std::cout << "tp: " << model_config.tp << "\n"; - - // Print non-null strings - if (!model_config.trtllm_version.empty()) - - std::cout << "trtllm_version: " << model_config.trtllm_version - << "\n"; - if (!std::isnan(static_cast(model_config.text_model))) - std::cout << "text_model: " << std::boolalpha - << model_config.text_model << "\n"; - - // Print non-empty vectors - if (!model_config.files.empty()) { - std::cout << "files: ["; - for (size_t i = 0; i < model_config.files.size(); ++i) { - std::cout << model_config.files[i]; - if (i < model_config.files.size() - 1) - std::cout << ", "; - } - std::cout << "]\n"; - } - - // Print valid size_t number - if (model_config.created != 0) - std::cout << "created: " << model_config.created << "\n"; - - if (!model_config.object.empty()) - std::cout << "object: " << model_config.object << "\n"; - if (!model_config.owned_by.empty()) - std::cout << "owned_by: " << model_config.owned_by << "\n"; - - found_model = true; - break; - } catch (const std::exception& e) { - CTL_ERR("Error reading yaml file '" << entry.path().string() - << "': " << e.what()); - } - } - } - if (!found_model) { - CLI_LOG("Model not found!"); - } - } else { - CLI_LOG("Model not found!"); + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + try { + auto model_entry = modellist_handler.GetModelInfo(model_handle); + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + + std::cout << model_config.ToString() << std::endl; + + } catch (const std::exception& e) { + CLI_LOG("Fail to get model information with ID '" + model_handle + + "': " + e.what()); } } -}; // namespace commands + +} // namespace commands \ No newline at end of file diff --git a/engine/commands/model_import_cmd.cc b/engine/commands/model_import_cmd.cc index 830a1fdd7..193b2488b 100644 --- a/engine/commands/model_import_cmd.cc +++ b/engine/commands/model_import_cmd.cc @@ -45,7 +45,8 @@ void ModelImportCmd::Exec() { } } catch (const std::exception& e) { - std::remove(model_yaml_path.c_str()); + // don't need to remove yml file here, because it's written only if model entry is successfully added, + // remove file here can make it fail with edge case when user try to import new model with existed model_id CLI_LOG("Error importing model path '" + model_path_ + "' with model_id '" + model_handle_ + "': " + e.what()); } diff --git a/engine/commands/model_list_cmd.cc b/engine/commands/model_list_cmd.cc index e0ca88bd3..6e3990eb6 100644 --- a/engine/commands/model_list_cmd.cc +++ b/engine/commands/model_list_cmd.cc @@ -6,51 +6,59 @@ #include "config/yaml_config.h" #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" +#include "utils/modellist_utils.h" namespace commands { void ModelListCmd::Exec() { auto models_path = file_manager_utils::GetModelsContainerPath(); - if (std::filesystem::exists(models_path) && - std::filesystem::is_directory(models_path)) { - tabulate::Table table; + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + tabulate::Table table; - table.add_row({"(Index)", "ID", "engine", "version"}); - table.format().font_color(tabulate::Color::green); - int count = 0; - // Iterate through directory - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - if (entry.is_regular_file() && entry.path().extension() == ".yaml") { - try { - count += 1; - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - const auto& model_config = handler.GetModelConfig(); - table.add_row({std::to_string(count), model_config.id, - model_config.engine, model_config.version}); - } catch (const std::exception& e) { - CTL_ERR("Error reading yaml file '" << entry.path().string() - << "': " << e.what()); - } + table.add_row({"(Index)", "ID", "model alias", "engine", "version"}); + table.format().font_color(tabulate::Color::green); + int count = 0; + // Iterate through directory + + try { + auto list_entry = modellist_handler.LoadModelList(); + for (const auto& model_entry : list_entry) { + // auto model_entry = modellist_handler.GetModelInfo(model_handle); + try { + count += 1; + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + table.add_row({std::to_string(count), model_entry.model_id, + model_entry.model_alias, model_config.engine, + model_config.version}); + yaml_handler.Reset(); + } catch (const std::exception& e) { + CTL_ERR("Fail to get list model information: " + std::string(e.what())); } } - for (int i = 0; i < 4; i++) { - table[0][i] - .format() - .font_color(tabulate::Color::white) // Set font color - .font_style({tabulate::FontStyle::bold}) - .font_align(tabulate::FontAlign::center); - } - for (int i = 1; i <= count; i++) { - table[i][0] //index value - .format() - .font_color(tabulate::Color::white) // Set font color - .font_align(tabulate::FontAlign::center); - table[i][3] //version value - .format() - .font_align(tabulate::FontAlign::center); - } - std::cout << table << std::endl; + } catch (const std::exception& e) { + CTL_ERR("Fail to get list model information: " + std::string(e.what())); + } + + for (int i = 0; i < 5; i++) { + table[0][i] + .format() + .font_color(tabulate::Color::white) // Set font color + .font_style({tabulate::FontStyle::bold}) + .font_align(tabulate::FontAlign::center); + } + for (int i = 1; i <= count; i++) { + table[i][0] //index value + .format() + .font_color(tabulate::Color::white) // Set font color + .font_align(tabulate::FontAlign::center); + table[i][4] //version value + .format() + .font_align(tabulate::FontAlign::center); } + std::cout << table << std::endl; } -}; // namespace commands +} + +; // namespace commands diff --git a/engine/config/model_config.h b/engine/config/model_config.h index f61f9e9ba..74410db52 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -1,6 +1,10 @@ #pragma once +#include +#include +#include #include +#include #include #include @@ -54,5 +58,189 @@ struct ModelConfig { int n_probs = 0; int min_keep = 0; std::string grammar; + + Json::Value ToJson() const { + Json::Value obj; + + obj["id"] = id; + obj["name"] = name; + obj["model"] = model; + obj["version"] = version; + + Json::Value stop_array(Json::arrayValue); + for (const auto& s : stop) { + stop_array.append(s); + } + obj["stop"] = stop_array; + + obj["stream"] = stream; + obj["top_p"] = top_p; + obj["temperature"] = temperature; + obj["frequency_penalty"] = frequency_penalty; + obj["presence_penalty"] = presence_penalty; + obj["max_tokens"] = max_tokens; + obj["seed"] = seed; + obj["dynatemp_range"] = dynatemp_range; + obj["dynatemp_exponent"] = dynatemp_exponent; + obj["top_k"] = top_k; + obj["min_p"] = min_p; + obj["tfs_z"] = tfs_z; + obj["typ_p"] = typ_p; + obj["repeat_last_n"] = repeat_last_n; + obj["repeat_penalty"] = repeat_penalty; + obj["mirostat"] = mirostat; + obj["mirostat_tau"] = mirostat_tau; + obj["mirostat_eta"] = mirostat_eta; + obj["penalize_nl"] = penalize_nl; + obj["ignore_eos"] = ignore_eos; + obj["n_probs"] = n_probs; + obj["min_keep"] = min_keep; + obj["ngl"] = ngl; + obj["ctx_len"] = ctx_len; + obj["engine"] = engine; + obj["prompt_template"] = prompt_template; + obj["system_template"] = system_template; + obj["user_template"] = user_template; + obj["ai_template"] = ai_template; + obj["os"] = os; + obj["gpu_arch"] = gpu_arch; + obj["quantization_method"] = quantization_method; + obj["precision"] = precision; + + Json::Value files_array(Json::arrayValue); + for (const auto& file : files) { + files_array.append(file); + } + obj["files"] = files_array; + + obj["created"] = static_cast(created); + obj["object"] = object; + obj["owned_by"] = owned_by; + obj["text_model"] = text_model; + + if (engine == "cortex.tensorrt-llm") { + obj["trtllm_version"] = trtllm_version; + obj["tp"] = tp; + } + + return obj; + } + std::string ToString() const { + std::ostringstream oss; + + // Color codes + const std::string RESET = "\033[0m"; + const std::string BOLD = "\033[1m"; + const std::string GREEN = "\033[1;32m"; + const std::string YELLOW = "\033[0;33m"; + const std::string BLUE = "\033[0;34m"; + const std::string MAGENTA = "\033[0;35m"; + const std::string GRAY = "\033[1;90m"; + + // Helper function to print comments + auto print_comment = [&oss, &GRAY, &RESET](const std::string& comment) { + oss << GRAY << "# " << comment << RESET << "\n"; + }; + + // Helper function to print key-value pairs + auto print_kv = [&oss, &GREEN, &RESET]( + const std::string& key, const auto& value, + const std::string& color = "\033[0m") { + oss << GREEN << key << ":" << RESET << " " << color << value << RESET + << "\n"; + }; + + // Helper function to print boolean values + auto print_bool = [&print_kv, &MAGENTA](const std::string& key, + bool value) { + print_kv(key, value ? "true" : "false", MAGENTA); + }; + + // Helper function to print float values with fixed precision + auto print_float = [&print_kv, &BLUE](const std::string& key, float value) { + if (!std::isnan(value)) { + std::ostringstream float_oss; + float_oss << std::fixed << std::setprecision(9) << value; + print_kv(key, float_oss.str(), BLUE); + } + }; + + print_comment("BEGIN GENERAL GGUF METADATA"); + if (!id.empty()) + print_kv("id", id, YELLOW); + if (!name.empty()) + print_kv("name", name, YELLOW); + if (!model.empty()) + print_kv("model", model, YELLOW); + if (!version.empty()) + print_kv("version", version, YELLOW); + if (!files.empty()) { + oss << GREEN << "files:" << RESET << "\n"; + for (const auto& file : files) { + oss << " - " << YELLOW << file << RESET << "\n"; + } + } + print_comment("END GENERAL GGUF METADATA"); + + print_comment("BEGIN INFERENCE PARAMETERS"); + print_comment("BEGIN REQUIRED"); + if (!stop.empty()) { + oss << GREEN << "stop:" << RESET << "\n"; + for (const auto& s : stop) { + oss << " - " << YELLOW << s << RESET << "\n"; + } + } + print_comment("END REQUIRED"); + print_comment("BEGIN OPTIONAL"); + + print_bool("stream", stream); + print_float("top_p", top_p); + print_float("temperature", temperature); + print_float("frequency_penalty", frequency_penalty); + print_float("presence_penalty", presence_penalty); + if (max_tokens != std::numeric_limits::quiet_NaN()) + print_kv("max_tokens", max_tokens, MAGENTA); + if (seed != -1) + print_kv("seed", seed, MAGENTA); + print_float("dynatemp_range", dynatemp_range); + print_float("dynatemp_exponent", dynatemp_exponent); + print_kv("top_k", top_k, MAGENTA); + print_float("min_p", min_p); + print_kv("tfs_z", tfs_z, MAGENTA); + print_float("typ_p", typ_p); + print_kv("repeat_last_n", repeat_last_n, MAGENTA); + print_float("repeat_penalty", repeat_penalty); + print_bool("mirostat", mirostat); + print_float("mirostat_tau", mirostat_tau); + print_float("mirostat_eta", mirostat_eta); + print_bool("penalize_nl", penalize_nl); + print_bool("ignore_eos", ignore_eos); + print_kv("n_probs", n_probs, MAGENTA); + print_kv("min_keep", min_keep, MAGENTA); + + print_comment("END OPTIONAL"); + print_comment("END INFERENCE PARAMETERS"); + print_comment("BEGIN MODEL LOAD PARAMETERS"); + print_comment("BEGIN REQUIRED"); + + if (!engine.empty()) + print_kv("engine", engine, YELLOW); + if (!prompt_template.empty()) + print_kv("prompt_template", prompt_template, YELLOW); + + print_comment("END REQUIRED"); + print_comment("BEGIN OPTIONAL"); + + if (ctx_len != std::numeric_limits::quiet_NaN()) + print_kv("ctx_len", ctx_len, MAGENTA); + if (ngl != std::numeric_limits::quiet_NaN()) + print_kv("ngl", ngl, MAGENTA); + + print_comment("END OPTIONAL"); + print_comment("END MODEL LOAD PARAMETERS"); + + return oss.str(); + } }; + } // namespace config diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 2d2434d6d..e857d89da 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -52,58 +52,46 @@ void Models::ListModel( Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - auto models_path = file_manager_utils::GetModelsContainerPath(); - if (std::filesystem::exists(models_path) && - std::filesystem::is_directory(models_path)) { - // Iterate through directory - for (const auto& entry : std::filesystem::directory_iterator(models_path)) { - if (entry.is_regular_file() && entry.path().extension() == ".yaml") { - try { - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - auto const& model_config = handler.GetModelConfig(); - Json::Value obj; - obj["name"] = model_config.name; - obj["model"] = model_config.model; - obj["version"] = model_config.version; - Json::Value stop_array(Json::arrayValue); - for (const std::string& stop : model_config.stop) - stop_array.append(stop); - obj["stop"] = stop_array; - obj["top_p"] = model_config.top_p; - obj["temperature"] = model_config.temperature; - obj["presence_penalty"] = model_config.presence_penalty; - obj["max_tokens"] = model_config.max_tokens; - obj["stream"] = model_config.stream; - obj["ngl"] = model_config.ngl; - obj["ctx_len"] = model_config.ctx_len; - obj["engine"] = model_config.engine; - obj["prompt_template"] = model_config.prompt_template; - Json::Value files_array(Json::arrayValue); - for (const std::string& file : model_config.files) - files_array.append(file); - obj["files"] = files_array; - obj["id"] = model_config.id; - obj["created"] = static_cast(model_config.created); - obj["object"] = model_config.object; - obj["owned_by"] = model_config.owned_by; - if (model_config.engine == "cortex.tensorrt-llm") { - obj["trtllm_version"] = model_config.trtllm_version; - } - data.append(std::move(obj)); - } catch (const std::exception& e) { - LOG_ERROR << "Error reading yaml file '" << entry.path().string() - << "': " << e.what(); - } + // Iterate through directory + + try { + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + + auto list_entry = modellist_handler.LoadModelList(); + + for (const auto& model_entry : list_entry) { + // auto model_entry = modellist_handler.GetModelInfo(model_handle); + try { + + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + Json::Value obj = model_config.ToJson(); + + data.append(std::move(obj)); + yaml_handler.Reset(); + } catch (const std::exception& e) { + LOG_ERROR << "Failed to load yaml file for model: " + << model_entry.path_to_model_yaml << ", error: " << e.what(); } } + ret["data"] = data; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::exception& e) { + std::string message = + "Fail to get list model information: " + std::string(e.what()); + LOG_ERROR << message; + ret["data"] = data; + ret["result"] = "Fail to get list model information"; + ret["message"] = message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); } - ret["data"] = data; - ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); - resp->setStatusCode(k200OK); - callback(resp); } void Models::GetModel( @@ -117,59 +105,33 @@ void Models::GetModel( Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - if (std::filesystem::exists(cortex_utils::models_folder) && - std::filesystem::is_directory(cortex_utils::models_folder)) { - // Iterate through directory - for (const auto& entry : - std::filesystem::directory_iterator(cortex_utils::models_folder)) { - if (entry.is_regular_file() && entry.path().extension() == ".yaml" && - entry.path().stem() == model_handle) { - try { - config::YamlHandler handler; - handler.ModelConfigFromFile(entry.path().string()); - auto const& model_config = handler.GetModelConfig(); - Json::Value obj; - obj["name"] = model_config.name; - obj["model"] = model_config.model; - obj["version"] = model_config.version; - Json::Value stop_array(Json::arrayValue); - for (const std::string& stop : model_config.stop) - stop_array.append(stop); - obj["stop"] = stop_array; - obj["top_p"] = model_config.top_p; - obj["temperature"] = model_config.temperature; - obj["presence_penalty"] = model_config.presence_penalty; - obj["max_tokens"] = model_config.max_tokens; - obj["stream"] = model_config.stream; - obj["ngl"] = model_config.ngl; - obj["ctx_len"] = model_config.ctx_len; - obj["engine"] = model_config.engine; - obj["prompt_template"] = model_config.prompt_template; - Json::Value files_array(Json::arrayValue); - for (const std::string& file : model_config.files) - files_array.append(file); - obj["files"] = files_array; - obj["id"] = model_config.id; - obj["created"] = static_cast(model_config.created); - obj["object"] = model_config.object; - obj["owned_by"] = model_config.owned_by; - if (model_config.engine == "cortex.tensorrt-llm") { - obj["trtllm_version"] = model_config.trtllm_version; - } - data.append(std::move(obj)); - } catch (const std::exception& e) { - LOG_ERROR << "Error reading yaml file '" << entry.path().string() - << "': " << e.what(); - } - } - } + try { + modellist_utils::ModelListUtils modellist_handler; + config::YamlHandler yaml_handler; + auto model_entry = modellist_handler.GetModelInfo(model_handle); + yaml_handler.ModelConfigFromFile(model_entry.path_to_model_yaml); + auto model_config = yaml_handler.GetModelConfig(); + + Json::Value obj = model_config.ToJson(); + + data.append(std::move(obj)); + ret["data"] = data; + ret["result"] = "OK"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::exception& e) { + std::string message = "Fail to get model information with ID '" + + model_handle + "': " + e.what(); + LOG_ERROR << message; + ret["data"] = data; + ret["result"] = "Fail to get model information"; + ret["message"] = message; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); } - ret["data"] = data; - ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); - resp->setStatusCode(k200OK); - callback(resp); } void Models::DeleteModel(const HttpRequestPtr& req, @@ -250,7 +212,6 @@ void Models::ImportModel( } } catch (const std::exception& e) { - std::remove(model_yaml_path.c_str()); std::string error_message = "Error importing model path '" + modelPath + "' with model_id '" + modelHandle + "': " + e.what(); diff --git a/engine/utils/modellist_utils.h b/engine/utils/modellist_utils.h index 7625c264b..75a41d880 100644 --- a/engine/utils/modellist_utils.h +++ b/engine/utils/modellist_utils.h @@ -22,14 +22,14 @@ class ModelListUtils { private: mutable std::mutex mutex_; // For thread safety - std::vector LoadModelList() const; - bool IsUnique(const std::vector& entries, + bool IsUnique(const std::vector& entries, const std::string& model_id, const std::string& model_alias) const; void SaveModelList(const std::vector& entries) const; public: static const std::string kModelListPath; + std::vector LoadModelList() const; ModelListUtils() = default; std::string GenerateShortenedAlias( const std::string& model_id,