From ea15f99db3037a1fc4ed60f730e231187a2bbb1b Mon Sep 17 00:00:00 2001 From: sangjanai Date: Thu, 27 Feb 2025 11:59:02 +0700 Subject: [PATCH 1/2] chore: remove unused fields for remote model config --- engine/config/model_config.h | 21 ---- .../extensions/remote-engine/remote_engine.cc | 115 ++++++------------ .../extensions/remote-engine/remote_engine.h | 3 - 3 files changed, 38 insertions(+), 101 deletions(-) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 1d51cfb01..8d1462590 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -18,16 +18,12 @@ namespace config { struct RemoteModelConfig { std::string model; - std::string header_template; std::string engine; std::string version; size_t created; std::string object = "model"; std::string owned_by = ""; Json::Value inference_params; - Json::Value transform_req; - Json::Value transform_resp; - Json::Value metadata; void LoadFromJson(const Json::Value& json) { if (!json.isObject()) { throw std::runtime_error("Input JSON must be an object"); @@ -35,8 +31,6 @@ struct RemoteModelConfig { // Load basic string fields model = json.get("model", model).asString(); - header_template = - json.get("header_template", header_template).asString(); engine = json.get("engine", engine).asString(); version = json.get("version", version).asString(); created = @@ -46,9 +40,6 @@ struct RemoteModelConfig { // Load JSON object fields directly inference_params = json.get("inference_params", inference_params); - transform_req = json.get("transform_req", transform_req); - transform_resp = json.get("transform_resp", transform_resp); - metadata = json.get("metadata", metadata); } Json::Value ToJson() const { @@ -56,7 +47,6 @@ struct RemoteModelConfig { // Add basic string fields json["model"] = model; - json["header_template"] = header_template; json["engine"] = engine; json["version"] = version; json["created"] = static_cast(created); @@ -65,9 +55,6 @@ struct RemoteModelConfig { // Add JSON object fields directly json["inference_params"] = inference_params; - json["transform_req"] = transform_req; - json["transform_resp"] = transform_resp; - json["metadata"] = metadata; return json; }; @@ -77,7 +64,6 @@ struct RemoteModelConfig { // Convert basic fields root["model"] = model; - root["header_template"] = header_template; root["engine"] = engine; root["version"] = version; root["object"] = object; @@ -87,9 +73,6 @@ struct RemoteModelConfig { // Convert Json::Value to YAML::Node using utility function root["inference_params"] = remote_models_utils::jsonToYaml(inference_params); - root["transform_req"] = remote_models_utils::jsonToYaml(transform_req); - root["transform_resp"] = remote_models_utils::jsonToYaml(transform_resp); - root["metadata"] = remote_models_utils::jsonToYaml(metadata); // Save to file std::ofstream fout(filepath); @@ -110,7 +93,6 @@ struct RemoteModelConfig { // Load basic fields model = root["model"].as(""); - header_template = root["header_template"].as(""); engine = root["engine"].as(""); version = root["version"] ? root["version"].as() : ""; created = root["created"] ? root["created"].as() : 0; @@ -120,9 +102,6 @@ struct RemoteModelConfig { // Load complex fields using utility function inference_params = remote_models_utils::yamlToJson(root["inference_params"]); - transform_req = remote_models_utils::yamlToJson(root["transform_req"]); - transform_resp = remote_models_utils::yamlToJson(root["transform_resp"]); - metadata = remote_models_utils::yamlToJson(root["metadata"]); } }; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 1640b7fac..7bd938b69 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -116,11 +116,6 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( } std::string full_url = chat_url_; - - if (config.transform_req["chat_completions"]["url"]) { - full_url = - config.transform_req["chat_completions"]["url"].as(); - } CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; @@ -134,12 +129,6 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( headers = curl_slist_append(headers, "Connection: keep-alive"); std::string stream_template = chat_res_template_; - if (config.transform_resp["chat_completions"] && - config.transform_resp["chat_completions"]["template"]) { - // Model level overrides engine level - stream_template = - config.transform_resp["chat_completions"]["template"].as(); - } StreamContext context{ std::make_shared>( @@ -295,11 +284,6 @@ CurlResponse RemoteEngine::MakeChatCompletionRequest( return response; } std::string full_url = chat_url_; - - if (config.transform_req["chat_completions"]["url"]) { - full_url = - config.transform_req["chat_completions"]["url"].as(); - } CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; @@ -341,7 +325,6 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, ModelConfig model_config; model_config.model = model; - model_config.api_key = body["api_key"].asString(); // model_config.url = ; // Optional fields if (auto s = config["header_template"]; s && !s.as().empty()) { @@ -350,16 +333,6 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, CTL_DBG("header: " << h); } } - if (config["transform_req"]) { - model_config.transform_req = config["transform_req"]; - } else { - LOG_WARN << "Missing transform_req in config for model " << model; - } - if (config["transform_resp"]) { - model_config.transform_resp = config["transform_resp"]; - } else { - LOG_WARN << "Missing transform_resp in config for model " << model; - } model_config.is_loaded = true; @@ -414,9 +387,10 @@ void RemoteEngine::LoadModel( std::shared_ptr json_body, std::function&& callback) { if (!json_body->isMember("model") || !json_body->isMember("model_path") || - !json_body->isMember("api_key")) { + !json_body->isMember("api_key") || !json_body->isMember("metadata")) { Json::Value error; - error["error"] = "Missing required fields: model or model_path"; + error["error"] = + "Missing required fields: model, model_path, api_key or metadata"; Json::Value status; status["is_done"] = true; status["has_error"] = true; @@ -428,43 +402,41 @@ void RemoteEngine::LoadModel( const std::string& model = (*json_body)["model"].asString(); const std::string& model_path = (*json_body)["model_path"].asString(); - const std::string& api_key = (*json_body)["api_key"].asString(); - - if (json_body->isMember("metadata")) { - metadata_ = (*json_body)["metadata"]; - if (!metadata_["transform_req"].isNull() && - !metadata_["transform_req"]["chat_completions"].isNull() && - !metadata_["transform_req"]["chat_completions"]["template"].isNull()) { - chat_req_template_ = - metadata_["transform_req"]["chat_completions"]["template"].asString(); - CTL_INF(chat_req_template_); - } - if (!metadata_["transform_resp"].isNull() && - !metadata_["transform_resp"]["chat_completions"].isNull() && - !metadata_["transform_resp"]["chat_completions"]["template"].isNull()) { - chat_res_template_ = - metadata_["transform_resp"]["chat_completions"]["template"] - .asString(); - CTL_INF(chat_res_template_); - } + metadata_ = (*json_body)["metadata"]; + if (!metadata_["transform_req"].isNull() && + !metadata_["transform_req"]["chat_completions"].isNull() && + !metadata_["transform_req"]["chat_completions"]["template"].isNull()) { + chat_req_template_ = + metadata_["transform_req"]["chat_completions"]["template"].asString(); + CTL_INF(chat_req_template_); + } else { + CTL_WRN("Required transform_req"); + } - if (!metadata_["transform_req"].isNull() && - !metadata_["transform_req"]["chat_completions"].isNull() && - !metadata_["transform_req"]["chat_completions"]["url"].isNull()) { - chat_url_ = - metadata_["transform_req"]["chat_completions"]["url"].asString(); - CTL_INF(chat_url_); - } + if (!metadata_["transform_resp"].isNull() && + !metadata_["transform_resp"]["chat_completions"].isNull() && + !metadata_["transform_resp"]["chat_completions"]["template"].isNull()) { + chat_res_template_ = + metadata_["transform_resp"]["chat_completions"]["template"].asString(); + CTL_INF(chat_res_template_); + } else { + CTL_WRN("Required transform_resp"); } - if (json_body->isMember("metadata")) { - if (!metadata_["header_template"].isNull()) { - header_ = ReplaceHeaderPlaceholders( - metadata_["header_template"].asString(), *json_body); - for (auto const& h : header_) { - CTL_DBG("header: " << h); - } + if (!metadata_["transform_req"].isNull() && + !metadata_["transform_req"]["chat_completions"].isNull() && + !metadata_["transform_req"]["chat_completions"]["url"].isNull()) { + chat_url_ = + metadata_["transform_req"]["chat_completions"]["url"].asString(); + CTL_INF(chat_url_); + } + + if (!metadata_["header_template"].isNull()) { + header_ = ReplaceHeaderPlaceholders(metadata_["header_template"].asString(), + *json_body); + for (auto const& h : header_) { + CTL_DBG("header: " << h); } } @@ -568,13 +540,8 @@ void RemoteEngine::HandleChatCompletion( if (!chat_req_template_.empty()) { CTL_DBG("Use engine transform request template: " << chat_req_template_); template_str = chat_req_template_; - } - if (model_config->transform_req["chat_completions"] && - model_config->transform_req["chat_completions"]["template"]) { - // Model level overrides engine level - template_str = model_config->transform_req["chat_completions"]["template"] - .as(); - CTL_DBG("Use model transform request template: " << template_str); + } else { + CTL_WRN("Required transform request template"); } // Render with error handling @@ -634,14 +601,8 @@ void RemoteEngine::HandleChatCompletion( CTL_DBG( "Use engine transform response template: " << chat_res_template_); template_str = chat_res_template_; - } - if (model_config->transform_resp["chat_completions"] && - model_config->transform_resp["chat_completions"]["template"]) { - // Model level overrides engine level - template_str = - model_config->transform_resp["chat_completions"]["template"] - .as(); - CTL_DBG("Use model transform request template: " << template_str); + } else { + CTL_WRN("Required transform response template"); } try { diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 6f1b731c6..90ca1397f 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -40,10 +40,7 @@ class RemoteEngine : public RemoteEngineI { struct ModelConfig { std::string model; std::string version; - std::string api_key; std::string url; - YAML::Node transform_req; - YAML::Node transform_resp; bool is_loaded{false}; }; From 50d0f1054435ba25f20e1e737139dbcf26e7f148 Mon Sep 17 00:00:00 2001 From: sangjanai Date: Thu, 27 Feb 2025 15:47:39 +0700 Subject: [PATCH 2/2] fix: model path --- engine/services/model_service.cc | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index c13f7cf19..4062cff10 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -369,10 +369,6 @@ ModelService::EstimateModel(const std::string& model_handle, CTL_WRN("Error: " + model_entry.error()); return cpp::fail(model_entry.error()); } - auto file_path = fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.value().path_to_model_yaml)) - .parent_path() / - "model.gguf"; yaml_handler.ModelConfigFromFile( fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) @@ -389,13 +385,14 @@ ModelService::EstimateModel(const std::string& model_handle, free_vram_MiB = hw_info.ram.available_MiB; #endif - return hardware::EstimateLLaMACppRun(file_path.string(), - {.ngl = mc.ngl, - .ctx_len = mc.ctx_len, - .n_batch = n_batch, - .n_ubatch = n_ubatch, - .kv_cache_type = kv_cache, - .free_vram_MiB = free_vram_MiB}); + return hardware::EstimateLLaMACppRun( + fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string(), + {.ngl = mc.ngl, + .ctx_len = mc.ctx_len, + .n_batch = n_batch, + .n_ubatch = n_ubatch, + .kv_cache_type = kv_cache, + .free_vram_MiB = free_vram_MiB}); } catch (const std::exception& e) { return cpp::fail("Fail to get model status with ID '" + model_handle + "': " + e.what()); @@ -1437,5 +1434,5 @@ void ModelService::ProcessBgrTasks() { auto clone = cb; task_queue_.RunInQueue(std::move(cb)); - task_queue_.RunEvery(std::chrono::seconds(10), std::move(clone)); + task_queue_.RunEvery(std::chrono::seconds(60), std::move(clone)); } \ No newline at end of file