From 3bc0cd5c0472db21b31744efab64505e0b947c48 Mon Sep 17 00:00:00 2001 From: sangjanai Date: Mon, 3 Mar 2025 16:42:37 +0700 Subject: [PATCH 1/2] feat: support pull and load vision model --- engine/config/model_config.h | 4 ++ engine/config/yaml_config.cc | 63 ++++++++++++++++++++--------- engine/controllers/models.cc | 4 +- engine/services/hardware_service.cc | 2 +- engine/services/model_service.cc | 9 +++++ 5 files changed, 59 insertions(+), 23 deletions(-) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 8d1462590..e95a94278 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -135,6 +135,7 @@ struct ModelConfig { bool text_model = std::numeric_limits::quiet_NaN(); std::string id; std::vector files; + std::string mmproj; std::size_t created; std::string object; std::string owned_by = ""; @@ -338,6 +339,9 @@ struct ModelConfig { files_array.append(file); } obj["files"] = files_array; + if (!mmproj.empty()) { + obj["mmproj"] = mmproj; + } obj["created"] = static_cast(created); obj["object"] = object; diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index 57b2b3ecb..8d5060615 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -21,11 +21,13 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { try { yaml_node_ = YAML::LoadFile(file_path); + auto nomalize_path = [](std::string p) { + std::replace(p.begin(), p.end(), '\\', '/'); + return p; + }; // incase of model.yml file, we don't have files yet, create them if (!yaml_node_["files"]) { - auto s = file_path; - // normalize path - std::replace(s.begin(), s.end(), '\\', '/'); + auto s = nomalize_path(file_path); std::vector v; if (yaml_node_["engine"] && (yaml_node_["engine"].as() == kLlamaRepo || @@ -41,6 +43,18 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { // TODO(any) need to support mutiple gguf files yaml_node_["files"] = v; } + + // add mmproj file to yml if exists + if (!yaml_node_["mmproj"]) { + auto s = nomalize_path(file_path); + auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf"; + CTL_DBG("mmproj: " << abs_path); + auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path)); + if (std::filesystem::exists(abs_path)) { + yaml_node_["mmproj"] = rel_path.string(); + } + } + } catch (const YAML::BadFile& e) { throw; } @@ -131,6 +145,8 @@ void YamlHandler::ModelConfigFromYaml() { tmp.stop = yaml_node_["stop"].as>(); if (yaml_node_["files"]) tmp.files = yaml_node_["files"].as>(); + if (yaml_node_["mmproj"]) + tmp.mmproj = yaml_node_["mmproj"].as(); if (yaml_node_["created"]) tmp.created = yaml_node_["created"].as(); @@ -239,6 +255,9 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) { if (model_config_.files.size() > 0) yaml_node_["files"] = model_config_.files; + if (!model_config_.mmproj.empty()) + yaml_node_["mmproj"] = model_config_.mmproj; + if (!std::isnan(static_cast(model_config_.seed))) yaml_node_["seed"] = model_config_.seed; if (!std::isnan(model_config_.dynatemp_range)) @@ -301,17 +320,21 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { "Model ID which is used for request construct - should be " "unique between models (author / quantization)"); out_file << format_utils::WriteKeyValue("name", yaml_node_["name"], - "metadata.general.name"); + "metadata.general.name"); if (yaml_node_["version"]) { - out_file << "version: " << yaml_node_["version"].as() << "\n"; + out_file << "version: " << yaml_node_["version"].as() + << "\n"; } if (yaml_node_["files"] && yaml_node_["files"].size()) { out_file << "files: # Can be relative OR absolute local file " - "path\n"; + "path\n"; for (const auto& source : yaml_node_["files"]) { out_file << " - " << source << "\n"; } } + if (yaml_node_["mmproj"]) { + out_file << "mmproj: " << yaml_node_["mmproj"].as() << "\n"; + } out_file << "# END GENERAL GGUF METADATA\n"; out_file << "\n"; @@ -330,9 +353,9 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { out_file << "# BEGIN OPTIONAL\n"; out_file << format_utils::WriteKeyValue("size", yaml_node_["size"]); out_file << format_utils::WriteKeyValue("stream", yaml_node_["stream"], - "Default true?"); + "Default true?"); out_file << format_utils::WriteKeyValue("top_p", yaml_node_["top_p"], - "Ranges: 0 to 1"); + "Ranges: 0 to 1"); out_file << format_utils::WriteKeyValue( "temperature", yaml_node_["temperature"], "Ranges: 0 to 1"); out_file << format_utils::WriteKeyValue( @@ -344,26 +367,26 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { "Should be default to context length"); out_file << format_utils::WriteKeyValue("seed", yaml_node_["seed"]); out_file << format_utils::WriteKeyValue("dynatemp_range", - yaml_node_["dynatemp_range"]); + yaml_node_["dynatemp_range"]); out_file << format_utils::WriteKeyValue("dynatemp_exponent", - yaml_node_["dynatemp_exponent"]); + yaml_node_["dynatemp_exponent"]); out_file << format_utils::WriteKeyValue("top_k", yaml_node_["top_k"]); out_file << format_utils::WriteKeyValue("min_p", yaml_node_["min_p"]); out_file << format_utils::WriteKeyValue("tfs_z", yaml_node_["tfs_z"]); out_file << format_utils::WriteKeyValue("typ_p", yaml_node_["typ_p"]); out_file << format_utils::WriteKeyValue("repeat_last_n", - yaml_node_["repeat_last_n"]); + yaml_node_["repeat_last_n"]); out_file << format_utils::WriteKeyValue("repeat_penalty", - yaml_node_["repeat_penalty"]); + yaml_node_["repeat_penalty"]); out_file << format_utils::WriteKeyValue("mirostat", yaml_node_["mirostat"]); out_file << format_utils::WriteKeyValue("mirostat_tau", - yaml_node_["mirostat_tau"]); + yaml_node_["mirostat_tau"]); out_file << format_utils::WriteKeyValue("mirostat_eta", - yaml_node_["mirostat_eta"]); + yaml_node_["mirostat_eta"]); out_file << format_utils::WriteKeyValue("penalize_nl", - yaml_node_["penalize_nl"]); + yaml_node_["penalize_nl"]); out_file << format_utils::WriteKeyValue("ignore_eos", - yaml_node_["ignore_eos"]); + yaml_node_["ignore_eos"]); out_file << format_utils::WriteKeyValue("n_probs", yaml_node_["n_probs"]); out_file << format_utils::WriteKeyValue("min_keep", yaml_node_["min_keep"]); out_file << format_utils::WriteKeyValue("grammar", yaml_node_["grammar"]); @@ -374,7 +397,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { out_file << "# BEGIN MODEL LOAD PARAMETERS\n"; out_file << "# BEGIN REQUIRED\n"; out_file << format_utils::WriteKeyValue("engine", yaml_node_["engine"], - "engine to run model"); + "engine to run model"); out_file << "prompt_template:"; out_file << " " << yaml_node_["prompt_template"] << "\n"; out_file << "# END REQUIRED\n"; @@ -384,11 +407,11 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { "ctx_len", yaml_node_["ctx_len"], "llama.context_length | 0 or undefined = loaded from model"); out_file << format_utils::WriteKeyValue("n_parallel", - yaml_node_["n_parallel"]); + yaml_node_["n_parallel"]); out_file << format_utils::WriteKeyValue("cpu_threads", - yaml_node_["cpu_threads"]); + yaml_node_["cpu_threads"]); out_file << format_utils::WriteKeyValue("ngl", yaml_node_["ngl"], - "Undefined = loaded from model"); + "Undefined = loaded from model"); out_file << "# END OPTIONAL\n"; out_file << "# END MODEL LOAD PARAMETERS\n"; diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 86b749ce6..d88efc254 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -533,8 +533,8 @@ void Models::StartModel( auto model_handle = (*(req->getJsonObject())).get("model", "").asString(); std::optional mmproj; - if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) { - mmproj = o.asString(); + if (auto& o = (*(req->getJsonObject())); o.isMember("mmproj")) { + mmproj = o["mmproj"].asString(); } auto bypass_llama_model_path = false; diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 972647b51..5ca27ddfb 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -304,7 +304,7 @@ void HardwareService::UpdateHardwareInfos() { }; for (auto const& he : b.value()) { if (!exists(he.uuid)) { - db_service_->DeleteHardwareEntry(he.uuid); + (void)db_service_->DeleteHardwareEntry(he.uuid); } } diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 32f92beee..b0e998536 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -947,6 +947,15 @@ cpp::result ModelService::StartModel( LOG_WARN << "model_path is empty"; return StartModelResult{.success = false}; } + if (!mc.mmproj.empty()) { +#if defined(_WIN32) + json_data["mmproj"] = cortex::wc::WstringToUtf8( + fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).wstring()); +#else + json_data["mmproj"] = + fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string(); +#endif + } json_data["system_prompt"] = mc.system_template; json_data["user_prompt"] = mc.user_template; json_data["ai_prompt"] = mc.ai_template; From d8c9862908dd11d8c11ac0c075358637d1305241 Mon Sep 17 00:00:00 2001 From: sangjanai Date: Wed, 5 Mar 2025 07:17:01 +0700 Subject: [PATCH 2/2] fix: discard metadata for vision models --- engine/services/model_service.cc | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index b0e998536..7ca86ac7c 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -155,8 +155,8 @@ ModelService::ModelService(std::shared_ptr db_service, inference_svc_(inference_service), engine_svc_(engine_svc), task_queue_(task_queue) { - // ProcessBgrTasks(); -}; + // ProcessBgrTasks(); + }; void ModelService::ForceIndexingModelList() { CTL_INF("Force indexing model list"); @@ -1005,16 +1005,18 @@ cpp::result ModelService::StartModel( auto data = std::get<1>(ir); if (status == drogon::k200OK) { - // start model successfully, we store the metadata so we can use + // start model successfully, in case not vision model, we store the metadata so we can use // for each inference - auto metadata_res = GetModelMetadata(model_handle); - if (metadata_res.has_value()) { - loaded_model_metadata_map_.emplace(model_handle, - std::move(metadata_res.value())); - CTL_INF("Successfully stored metadata for model " << model_handle); - } else { - CTL_WRN("Failed to get metadata for model " << model_handle << ": " - << metadata_res.error()); + if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) { + auto metadata_res = GetModelMetadata(model_handle); + if (metadata_res.has_value()) { + loaded_model_metadata_map_.emplace(model_handle, + std::move(metadata_res.value())); + CTL_INF("Successfully stored metadata for model " << model_handle); + } else { + CTL_WRN("Failed to get metadata for model " << model_handle << ": " + << metadata_res.error()); + } } return StartModelResult{.success = true,