From bc2b6b831f5d7f2f175704b47d71247a1e6bfc92 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 18 Dec 2024 20:17:35 +0700 Subject: [PATCH 01/20] chore: convention --- docs/static/openapi/cortex.json | 8 ++--- engine/config/model_config.h | 32 +++++++++---------- .../extensions/remote-engine/remote_engine.cc | 28 ++++++++-------- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index a05f8b24e..28fe5a9ec 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -4858,8 +4858,8 @@ "engine", "version", "inference_params", - "TransformReq", - "TransformResp", + "transform_req", + "transform_resp", "metadata" ], "properties": { @@ -4902,7 +4902,7 @@ } } }, - "TransformReq": { + "transform_req": { "type": "object", "properties": { "get_models": { @@ -4924,7 +4924,7 @@ } } }, - "TransformResp": { + "transform_resp": { "type": "object", "properties": { "chat_completions": { diff --git a/engine/config/model_config.h b/engine/config/model_config.h index a799adb27..abe08dbf2 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -23,8 +23,8 @@ struct RemoteModelConfig { std::string object = "model"; std::string owned_by = ""; Json::Value inference_params; - Json::Value TransformReq; - Json::Value TransformResp; + Json::Value transform_req; + Json::Value transform_resp; Json::Value metadata; void LoadFromJson(const Json::Value& json) { if (!json.isObject()) { @@ -44,27 +44,27 @@ struct RemoteModelConfig { // Load JSON object fields directly inference_params = json.get("inference_params", inference_params); - TransformReq = json.get("TransformReq", TransformReq); + transform_req = json.get("transform_req", transform_req); // Use default template if it is empty, currently we only support 2 remote engines auto is_anthropic = [](const std::string& model) { return model.find("claude") != std::string::npos; }; - if (TransformReq["chat_completions"]["template"].isNull()) { + if (transform_req["chat_completions"]["template"].isNull()) { if (is_anthropic(model)) { - TransformReq["chat_completions"]["template"] = + transform_req["chat_completions"]["template"] = kAnthropicTransformReqTemplate; } else { - TransformReq["chat_completions"]["template"] = + transform_req["chat_completions"]["template"] = kOpenAITransformReqTemplate; } } - TransformResp = json.get("TransformResp", TransformResp); - if (TransformResp["chat_completions"]["template"].isNull()) { + transform_resp = json.get("transform_resp", transform_resp); + if (transform_resp["chat_completions"]["template"].isNull()) { if (is_anthropic(model)) { - TransformResp["chat_completions"]["template"] = + transform_resp["chat_completions"]["template"] = kAnthropicTransformRespTemplate; } else { - TransformResp["chat_completions"]["template"] = + transform_resp["chat_completions"]["template"] = kOpenAITransformRespTemplate; } } @@ -86,8 +86,8 @@ struct RemoteModelConfig { // Add JSON object fields directly json["inference_params"] = inference_params; - json["TransformReq"] = TransformReq; - json["TransformResp"] = TransformResp; + json["transform_req"] = transform_req; + json["transform_resp"] = transform_resp; json["metadata"] = metadata; return json; @@ -108,8 +108,8 @@ struct RemoteModelConfig { // Convert Json::Value to YAML::Node using utility function root["inference_params"] = remote_models_utils::jsonToYaml(inference_params); - root["TransformReq"] = remote_models_utils::jsonToYaml(TransformReq); - root["TransformResp"] = remote_models_utils::jsonToYaml(TransformResp); + root["transform_req"] = remote_models_utils::jsonToYaml(transform_req); + root["transform_resp"] = remote_models_utils::jsonToYaml(transform_resp); root["metadata"] = remote_models_utils::jsonToYaml(metadata); // Save to file @@ -141,8 +141,8 @@ struct RemoteModelConfig { // Load complex fields using utility function inference_params = remote_models_utils::yamlToJson(root["inference_params"]); - TransformReq = remote_models_utils::yamlToJson(root["TransformReq"]); - TransformResp = remote_models_utils::yamlToJson(root["TransformResp"]); + transform_req = remote_models_utils::yamlToJson(root["transform_req"]); + transform_resp = remote_models_utils::yamlToJson(root["transform_resp"]); metadata = remote_models_utils::yamlToJson(root["metadata"]); } }; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 6361077dd..a48f993fb 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -313,15 +313,15 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, api_key_template_ = ReplaceApiKeyPlaceholder( config["api_key_template"].as(), api_key); } - if (config["TransformReq"]) { - model_config.transform_req = config["TransformReq"]; + if (config["transform_req"]) { + model_config.transform_req = config["transform_req"]; } else { - LOG_WARN << "Missing TransformReq in config for model " << model; + LOG_WARN << "Missing transform_req in config for model " << model; } - if (config["TransformResp"]) { - model_config.transform_resp = config["TransformResp"]; + if (config["transform_resp"]) { + model_config.transform_resp = config["transform_resp"]; } else { - LOG_WARN << "Missing TransformResp in config for model " << model; + LOG_WARN << "Missing transform_resp in config for model " << model; } model_config.is_loaded = true; @@ -406,19 +406,19 @@ void RemoteEngine::LoadModel( } if (json_body->isMember("metadata")) { metadata_ = (*json_body)["metadata"]; - if (!metadata_["TransformReq"].isNull() && - !metadata_["TransformReq"]["chat_completions"].isNull() && - !metadata_["TransformReq"]["chat_completions"]["template"].isNull()) { + if (!metadata_["transform_req"].isNull() && + !metadata_["transform_req"]["chat_completions"].isNull() && + !metadata_["transform_req"]["chat_completions"]["template"].isNull()) { chat_req_template_ = - metadata_["TransformReq"]["chat_completions"]["template"].asString(); + metadata_["transform_req"]["chat_completions"]["template"].asString(); CTL_INF(chat_req_template_); } - if (!metadata_["TransformResp"].isNull() && - !metadata_["TransformResp"]["chat_completions"].isNull() && - !metadata_["TransformResp"]["chat_completions"]["template"].isNull()) { + if (!metadata_["transform_resp"].isNull() && + !metadata_["transform_resp"]["chat_completions"].isNull() && + !metadata_["transform_resp"]["chat_completions"]["template"].isNull()) { chat_res_template_ = - metadata_["TransformResp"]["chat_completions"]["template"].asString(); + metadata_["transform_resp"]["chat_completions"]["template"].asString(); CTL_INF(chat_res_template_); } } From bd57ed4e88e1be7dbf341dd49361179fdb414a39 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 08:31:09 +0700 Subject: [PATCH 02/20] fix: correct get remote model list --- engine/controllers/engines.cc | 87 +++++++++++-------- engine/controllers/engines.h | 6 ++ engine/cortex-common/remote_enginei.h | 3 +- .../extensions/remote-engine/remote_engine.cc | 22 ++--- .../extensions/remote-engine/remote_engine.h | 6 +- engine/services/engine_service.cc | 16 ++-- 6 files changed, 87 insertions(+), 53 deletions(-) diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index a92d6805f..67faa0ec6 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -3,6 +3,7 @@ #include "utils/archive_utils.h" #include "utils/cortex_utils.h" #include "utils/engine_constants.h" +#include "utils/http_util.h" #include "utils/logging_utils.h" #include "utils/string_utils.h" @@ -173,21 +174,57 @@ void Engines::InstallEngine( norm_version = version; } - if ((req->getJsonObject()) && - (*(req->getJsonObject())).get("type", "").asString() == "remote") { - auto type = (*(req->getJsonObject())).get("type", "").asString(); - auto api_key = (*(req->getJsonObject())).get("api_key", "").asString(); - auto url = (*(req->getJsonObject())).get("url", "").asString(); + auto result = + engine_service_->InstallEngineAsync(engine, norm_version, norm_variant); + if (result.has_error()) { + Json::Value res; + res["message"] = result.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + CTL_INF("Error: " << result.error()); + callback(resp); + } else { + Json::Value res; + res["message"] = "Engine starts installing!"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k200OK); + CTL_INF("Engine starts installing!"); + callback(resp); + } +} + +void Engines::InstallRemoteEngine( + const HttpRequestPtr& req, + std::function&& callback) { + if (!http_util::HasFieldInReq(req, callback, "engine")) { + return; + } + std::optional norm_variant = std::nullopt; + std::string norm_version{"latest"}; + + if (req->getJsonObject() != nullptr) { + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + auto version = + (*(req->getJsonObject())).get("version", "latest").asString(); + + if (!variant.empty()) { + norm_variant = variant; + } + norm_version = version; + } + + if (auto o = req->getJsonObject(); o) { + auto engine = (*o).get("engine", "").asString(); + auto type = (*o).get("type", "").asString(); + auto api_key = (*o).get("api_key", "").asString(); + auto url = (*o).get("url", "").asString(); auto variant = norm_variant.value_or("all-platforms"); - auto status = (*(req->getJsonObject())).get("status", "Default").asString(); + auto status = (*o).get("status", "Default").asString(); std::string metadata; - if ((*(req->getJsonObject())).isMember("metadata") && - (*(req->getJsonObject()))["metadata"].isObject()) { - metadata = (*(req->getJsonObject())) - .get("metadata", Json::Value(Json::objectValue)) - .toStyledString(); - } else if ((*(req->getJsonObject())).isMember("metadata") && - !(*(req->getJsonObject()))["metadata"].isObject()) { + if ((*o).isMember("metadata") && (*o)["metadata"].isObject()) { + metadata = + (*o).get("metadata", Json::Value(Json::objectValue)).toStyledString(); + } else if ((*o).isMember("metadata") && !(*o)["metadata"].isObject()) { Json::Value res; res["message"] = "metadata must be object"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -196,8 +233,7 @@ void Engines::InstallEngine( return; } - auto get_models_url = (*(req->getJsonObject())) - .get("metadata", Json::Value(Json::objectValue)) + auto get_models_url = (*o).get("metadata", Json::Value(Json::objectValue)) .get("get_models_url", "") .asString(); @@ -250,25 +286,6 @@ void Engines::InstallEngine( resp->setStatusCode(k200OK); callback(resp); } - return; - } - - auto result = - engine_service_->InstallEngineAsync(engine, norm_version, norm_variant); - if (result.has_error()) { - Json::Value res; - res["message"] = result.error(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); - resp->setStatusCode(k400BadRequest); - CTL_INF("Error: " << result.error()); - callback(resp); - } else { - Json::Value res; - res["message"] = "Engine starts installing!"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); - resp->setStatusCode(k200OK); - CTL_INF("Engine starts installing!"); - callback(resp); } } @@ -298,6 +315,8 @@ void Engines::UpdateEngine( const HttpRequestPtr& req, std::function&& callback, const std::string& engine) { + // Check if it is remote engine + auto result = engine_service_->UpdateEngine(engine); if (result.has_error()) { Json::Value res; diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h index b0a92b6c3..7391a2cdc 100644 --- a/engine/controllers/engines.h +++ b/engine/controllers/engines.h @@ -16,6 +16,8 @@ class Engines : public drogon::HttpController { METHOD_ADD(Engines::InstallEngine, "/{1}/install", Options, Post); ADD_METHOD_TO(Engines::InstallEngine, "/v1/engines/{1}/install", Options, Post); + METHOD_ADD(Engines::InstallRemoteEngine, "/engines", Options, Post); + ADD_METHOD_TO(Engines::InstallRemoteEngine, "/v1/engines", Options, Post); // uninstall engine METHOD_ADD(Engines::UninstallEngine, "/{1}/install", Options, Delete); @@ -70,6 +72,10 @@ class Engines : public drogon::HttpController { std::function&& callback, const std::string& engine); + void InstallRemoteEngine( + const HttpRequestPtr& req, + std::function&& callback); + void UninstallEngine(const HttpRequestPtr& req, std::function&& callback, const std::string& engine); diff --git a/engine/cortex-common/remote_enginei.h b/engine/cortex-common/remote_enginei.h index 81ffbf5cd..5b71bfe04 100644 --- a/engine/cortex-common/remote_enginei.h +++ b/engine/cortex-common/remote_enginei.h @@ -33,5 +33,6 @@ class RemoteEngineI { std::function&& callback) = 0; // Get available remote models - virtual Json::Value GetRemoteModels() = 0; + virtual Json::Value GetRemoteModels(const std::string& url, + const std::string& api_key) = 0; }; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index a48f993fb..d4d0902bd 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -199,7 +199,8 @@ RemoteEngine::ModelConfig* RemoteEngine::GetModelConfig( return nullptr; } -CurlResponse RemoteEngine::MakeGetModelsRequest() { +CurlResponse RemoteEngine::MakeGetModelsRequest(const std::string& url, + const std::string& api_key) { CURL* curl = curl_easy_init(); CurlResponse response; @@ -209,13 +210,12 @@ CurlResponse RemoteEngine::MakeGetModelsRequest() { return response; } - std::string full_url = metadata_["get_models_url"].asString(); - + std::string api_key_header = "Authorization: Bearer " + api_key; struct curl_slist* headers = nullptr; - headers = curl_slist_append(headers, api_key_template_.c_str()); + headers = curl_slist_append(headers, api_key_header.c_str()); headers = curl_slist_append(headers, "Content-Type: application/json"); - curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); std::string response_string; @@ -418,7 +418,8 @@ void RemoteEngine::LoadModel( !metadata_["transform_resp"]["chat_completions"].isNull() && !metadata_["transform_resp"]["chat_completions"]["template"].isNull()) { chat_res_template_ = - metadata_["transform_resp"]["chat_completions"]["template"].asString(); + metadata_["transform_resp"]["chat_completions"]["template"] + .asString(); CTL_INF(chat_res_template_); } } @@ -686,9 +687,9 @@ void RemoteEngine::HandleEmbedding( callback(Json::Value(), Json::Value()); } -Json::Value RemoteEngine::GetRemoteModels() { - if (metadata_["get_models_url"].isNull() || - metadata_["get_models_url"].asString().empty()) { +Json::Value RemoteEngine::GetRemoteModels(const std::string& url, + const std::string& api_key) { + if (url.empty()) { if (engine_name_ == kAnthropicEngine) { Json::Value json_resp; Json::Value model_array(Json::arrayValue); @@ -709,10 +710,11 @@ Json::Value RemoteEngine::GetRemoteModels() { return Json::Value(); } } else { - auto response = MakeGetModelsRequest(); + auto response = MakeGetModelsRequest(url, api_key); if (response.error) { Json::Value error; error["error"] = response.error_message; + CTL_WRN(response.error_message); return error; } Json::Value response_json; diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index d8dfbad61..40198f966 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -60,7 +60,8 @@ class RemoteEngine : public RemoteEngineI { CurlResponse MakeStreamingChatCompletionRequest( const ModelConfig& config, const std::string& body, const std::function& callback); - CurlResponse MakeGetModelsRequest(); + CurlResponse MakeGetModelsRequest(const std::string& url, + const std::string& api_key); // Internal model management bool LoadModelConfig(const std::string& model, const std::string& yaml_path, @@ -97,7 +98,8 @@ class RemoteEngine : public RemoteEngineI { std::shared_ptr json_body, std::function&& callback) override; - Json::Value GetRemoteModels() override; + Json::Value GetRemoteModels(const std::string& url, + const std::string& api_key) override; }; } // namespace remote_engine \ No newline at end of file diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index bdd080f50..b82dd374f 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -1061,17 +1061,21 @@ cpp::result EngineService::GetRemoteModels( return cpp::fail(r.error()); } + auto exist_engine = GetEngineByNameAndVariant(engine_name); + if (exist_engine.has_error()) { + return cpp::fail("Remote engine '" + engine_name + "' is not installed"); + } + if (!IsEngineLoaded(engine_name)) { - auto exist_engine = GetEngineByNameAndVariant(engine_name); - if (exist_engine.has_error()) { - return cpp::fail("Remote engine '" + engine_name + "' is not installed"); - } engines_[engine_name].engine = new remote_engine::RemoteEngine(engine_name); CTL_INF("Loaded engine: " << engine_name); } - auto& e = std::get(engines_[engine_name].engine); - auto res = e->GetRemoteModels(); + auto remote_engine_json = exist_engine.value().ToJson(); + auto& e = std::get(engines_[engine_name].engine); + auto url = remote_engine_json["metadata"]["get_models_url"].asString(); + auto api_key = remote_engine_json["api_key"].asString(); + auto res = e->GetRemoteModels(url, api_key); if (!res["error"].isNull()) { return cpp::fail(res["error"].asString()); } else { From 8e14e7e425476f7e81df7d9ef1544e02bf5ffa32 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 12:45:53 +0700 Subject: [PATCH 03/20] feat: auto generate remote model config --- engine/config/model_config.h | 2 +- engine/config/remote_template.h | 41 +++++++++++++++++++ engine/controllers/engines.cc | 9 ++++- engine/controllers/models.cc | 7 ++-- engine/services/engine_service.cc | 66 ++++++++++++++++++++++++++++++- engine/services/engine_service.h | 5 ++- 6 files changed, 122 insertions(+), 8 deletions(-) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index abe08dbf2..264a19d5a 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -19,7 +19,7 @@ struct RemoteModelConfig { std::string api_key_template; std::string engine; std::string version; - std::size_t created; + size_t created; std::string object = "model"; std::string owned_by = ""; Json::Value inference_params; diff --git a/engine/config/remote_template.h b/engine/config/remote_template.h index 8a17aaa9a..169bb854c 100644 --- a/engine/config/remote_template.h +++ b/engine/config/remote_template.h @@ -63,4 +63,45 @@ const std::string kAnthropicTransformRespTemplate = R"({ "system_fingerprint": "fp_6b68a8204b" })"; +const std::string kDefaultRemoteModelConfig = R"( +{ + "model": "o1-preview", + "api_key_template": "Authorization: Bearer {{api_key}}", + "engine": "openai", + "version": "1", + "inference_params": { + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "max_tokens": 4096, + "stream": true + }, + "transform_req": { + "get_models": { + "url": "https://api.openai.com/v1/models" + }, + "chat_completions": { + "url": "https://api.openai.com/v1/chat/completions", + "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" + }, + "embeddings": { + "url": "https://api.openai.com/v1/embeddings", + "template": "{\"input\": {{tojson(input)}}, \"model\": \"text-embedding-ada-002\"}" + } + }, + "transform_resp": { + "chat_completions": { + "template":"{ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == \"id\" or key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"object\" or key == \"usage\" -%} {%- if not first -%},{%- endif -%} \"{{ key }}\": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} }" + }, + "embeddings": {} + }, + "metadata": { + "author": "OpenAI", + "description": "GPT-4 is a large language model by OpenAI", + "end_point": "https://api.openai.com/v1/chat/completions", + "logo": "https://i.pinimg.com/564x/08/ea/94/08ea94ca94a4b3a04037bdfc335ae00d.jpg", + "api_key_url": "https://platform.openai.com/api-keys" + } +})"; } // namespace config \ No newline at end of file diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 67faa0ec6..20d081f8d 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -5,6 +5,7 @@ #include "utils/engine_constants.h" #include "utils/http_util.h" #include "utils/logging_utils.h" +#include "utils/scope_exit.h" #include "utils/string_utils.h" namespace { @@ -213,8 +214,9 @@ void Engines::InstallRemoteEngine( norm_version = version; } + std::string engine; if (auto o = req->getJsonObject(); o) { - auto engine = (*o).get("engine", "").asString(); + engine = (*o).get("engine", "").asString(); auto type = (*o).get("type", "").asString(); auto api_key = (*o).get("api_key", "").asString(); auto url = (*o).get("url", "").asString(); @@ -275,6 +277,11 @@ void Engines::InstallRemoteEngine( resp->setStatusCode(k400BadRequest); callback(resp); } else { + auto gr = engine_service_->GenerateRemoteModel(engine); + if (gr.has_error()) { + CTL_INF("Error: " << gr.error()); + } + Json::Value res; if (get_models_url.empty()) { res["warning"] = diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 59793b2a6..5ab26012d 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -668,7 +668,7 @@ void Models::AddRemoteModel( auto model_handle = (*(req->getJsonObject())).get("model", "").asString(); auto engine_name = (*(req->getJsonObject())).get("engine", "").asString(); - + auto engine_validate = engine_service_->IsEngineReady(engine_name); if (engine_validate.has_error()) { Json::Value ret; @@ -687,7 +687,7 @@ void Models::AddRemoteModel( callback(resp); return; } - + config::RemoteModelConfig model_config; model_config.LoadFromJson(*(req->getJsonObject())); cortex::db::Models modellist_utils_obj; @@ -699,11 +699,10 @@ void Models::AddRemoteModel( // Use relative path for model_yaml_path. In case of import, we use absolute path for model auto yaml_rel_path = fmu::ToRelativeCortexDataPath(fs::path(model_yaml_path)); - // TODO: remove hardcode "openai" when engine is finish cortex::db::ModelEntry model_entry{ model_handle, "", "", yaml_rel_path.string(), model_handle, "remote", "imported", cortex::db::ModelStatus::Remote, - "openai"}; + engine_name}; std::filesystem::create_directories( std::filesystem::path(model_yaml_path).parent_path()); if (modellist_utils_obj.AddModelEntry(model_entry).value()) { diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index b82dd374f..5956132b3 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -5,7 +5,9 @@ #include #include #include "algorithm" +#include "config/model_config.h" #include "database/engines.h" +#include "database/models.h" #include "extensions/remote-engine/remote_engine.h" #include "utils/archive_utils.h" #include "utils/engine_constants.h" @@ -1072,7 +1074,7 @@ cpp::result EngineService::GetRemoteModels( CTL_INF("Loaded engine: " << engine_name); } auto remote_engine_json = exist_engine.value().ToJson(); - auto& e = std::get(engines_[engine_name].engine); + auto& e = std::get(engines_[engine_name].engine); auto url = remote_engine_json["metadata"]["get_models_url"].asString(); auto api_key = remote_engine_json["api_key"].asString(); auto res = e->GetRemoteModels(url, api_key); @@ -1092,6 +1094,68 @@ bool EngineService::IsRemoteEngine(const std::string& engine_name) { } return true; } +cpp::result EngineService::GenerateRemoteModel( + const std::string& engine_name) { + namespace fmu = file_manager_utils; + namespace fs = std::filesystem; + auto exist_engine = GetEngineByNameAndVariant(engine_name); + if (exist_engine.has_error()) { + return cpp::fail("Remote engine '" + engine_name + "' is not installed"); + } + + if (!IsEngineLoaded(engine_name)) { + engines_[engine_name].engine = new remote_engine::RemoteEngine(engine_name); + CTL_INF("Loaded engine: " << engine_name); + } + + auto remote_engine_json = exist_engine.value().ToJson(); + auto& e = std::get(engines_[engine_name].engine); + auto url = remote_engine_json["metadata"]["get_models_url"].asString(); + auto api_key = remote_engine_json["api_key"].asString(); + auto res = e->GetRemoteModels(url, api_key); + if (!res["error"].isNull()) { + return cpp::fail(res["error"].asString()); + } else { + for (auto& d : res["data"]) { + auto model_handle = d["id"].asString(); + config::RemoteModelConfig model_config; + Json::Value body = + json_helper::ParseJsonString(config::kDefaultRemoteModelConfig); + body["model"] = model_handle; + body["engine"] = engine_name; + // CTL_INF(body.toStyledString()); + model_config.LoadFromJson(body); + cortex::db::Models modellist_utils_obj; + + std::string model_yaml_path = + (file_manager_utils::GetModelsContainerPath() / + std::filesystem::path("remote") / + std::filesystem::path(model_handle + ".yml")) + .string(); + try { + auto yaml_rel_path = + fmu::ToRelativeCortexDataPath(fs::path(model_yaml_path)); + cortex::db::ModelEntry model_entry{ + model_handle, "", "", yaml_rel_path.string(), + model_handle, "remote", "imported", cortex::db::ModelStatus::Remote, + engine_name}; + std::filesystem::create_directories( + std::filesystem::path(model_yaml_path).parent_path()); + if (modellist_utils_obj.AddModelEntry(model_entry).value()) { + model_config.SaveToYamlFile(model_yaml_path); + } else { + CTL_INF("Fail to import model, model_id '" + model_handle + + "' already exists!"); + } + } catch (const std::exception& e) { + return cpp::fail("Error while adding Remote model with model_id '" + + model_handle + "': " + e.what()); + } + } + } + + return true; +} cpp::result, std::string> EngineService::GetSupportedEngineNames() { diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 527123cb5..38f7341ca 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -137,7 +137,7 @@ class EngineService : public EngineServiceI { cpp::result GetEngineByNameAndVariant( const std::string& engine_name, - const std::optional variant = std::nullopt); + const std::optional variant = std::nullopt) override; cpp::result UpsertEngine( const std::string& engine_name, const std::string& type, @@ -155,6 +155,9 @@ class EngineService : public EngineServiceI { bool IsRemoteEngine(const std::string& engine_name) override; + cpp::result GenerateRemoteModel( + const std::string& engine_name); + private: bool IsEngineLoaded(const std::string& engine); From 1075a68176222ddc79de611a0581fbda40baa9bc Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 13:19:48 +0700 Subject: [PATCH 04/20] feat: support update remote engine --- engine/controllers/engines.cc | 59 ++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 20d081f8d..c193c992c 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -322,7 +322,64 @@ void Engines::UpdateEngine( const HttpRequestPtr& req, std::function&& callback, const std::string& engine) { - // Check if it is remote engine + + if (engine_service_->IsRemoteEngine(engine)) { + auto exist_engine = engine_service_->GetEngineByNameAndVariant(engine); + // only allow 1 variant 1 version of a remote engine name + if (!exist_engine) { + Json::Value res; + res["message"] = "Remote engine '" + engine + "' is not installed"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + callback(resp); + } else { + if (auto o = req->getJsonObject(); o) { + auto type = (*o).get("type", (*exist_engine).type).asString(); + auto api_key = (*o).get("api_key", (*exist_engine).api_key).asString(); + auto url = (*o).get("url", (*exist_engine).url).asString(); + auto status = (*o).get("status", (*exist_engine).status).asString(); + auto version = (*o).get("version", "latest").asString(); + std::string metadata; + if ((*o).isMember("metadata") && (*o)["metadata"].isObject()) { + metadata = (*o).get("metadata", Json::Value(Json::objectValue)) + .toStyledString(); + } else if ((*o).isMember("metadata") && !(*o)["metadata"].isObject()) { + Json::Value res; + res["message"] = "metadata must be object"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } else { + metadata = (*exist_engine).metadata; + } + + auto upd_res = + engine_service_->UpsertEngine(engine, type, api_key, url, version, + "all-platforms", status, metadata); + if (upd_res.has_error()) { + Json::Value res; + res["message"] = upd_res.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + callback(resp); + } else { + Json::Value res; + res["message"] = "Remote Engine update successfully!"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k200OK); + callback(resp); + } + } else { + Json::Value res; + res["message"] = "Request body is empty!"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + callback(resp); + } + } + return; + } auto result = engine_service_->UpdateEngine(engine); if (result.has_error()) { From 5c19de755995f3f9ab0f396ec1301f699a98a250 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 13:46:51 +0700 Subject: [PATCH 05/20] fix: do not generate remote model --- engine/controllers/engines.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index c193c992c..c3f10fd48 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -277,10 +277,10 @@ void Engines::InstallRemoteEngine( resp->setStatusCode(k400BadRequest); callback(resp); } else { - auto gr = engine_service_->GenerateRemoteModel(engine); - if (gr.has_error()) { - CTL_INF("Error: " << gr.error()); - } + // auto gr = engine_service_->GenerateRemoteModel(engine); + // if (gr.has_error()) { + // CTL_INF("Error: " << gr.error()); + // } Json::Value res; if (get_models_url.empty()) { From ea81b18f70093f76db0c314fc7a8b957d0ce54b2 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 14:31:21 +0700 Subject: [PATCH 06/20] chore: change engine_name to engine --- engine/database/engines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/database/engines.h b/engine/database/engines.h index 7429d0fa2..1312a9c67 100644 --- a/engine/database/engines.h +++ b/engine/database/engines.h @@ -27,7 +27,7 @@ struct EngineEntry { // Convert basic fields root["id"] = id; - root["engine_name"] = engine_name; + root["engine"] = engine_name; root["type"] = type; root["api_key"] = api_key; root["url"] = url; From 965a0c8ef4f47e71ffdd67afb9bd685cf92786dc Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 15:09:20 +0700 Subject: [PATCH 07/20] fix: api key template on engine level --- engine/cortex-common/remote_enginei.h | 3 +- .../extensions/remote-engine/remote_engine.cc | 50 ++++++++++++------- .../extensions/remote-engine/remote_engine.h | 8 +-- engine/services/engine_service.cc | 9 +++- 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/engine/cortex-common/remote_enginei.h b/engine/cortex-common/remote_enginei.h index 5b71bfe04..190758541 100644 --- a/engine/cortex-common/remote_enginei.h +++ b/engine/cortex-common/remote_enginei.h @@ -34,5 +34,6 @@ class RemoteEngineI { // Get available remote models virtual Json::Value GetRemoteModels(const std::string& url, - const std::string& api_key) = 0; + const std::string& api_key, + const std::string& api_key_template) = 0; }; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index d4d0902bd..e60c9b947 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -107,7 +107,7 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( struct curl_slist* headers = nullptr; if (!config.api_key.empty()) { - headers = curl_slist_append(headers, api_key_template_.c_str()); + headers = curl_slist_append(headers, api_key_header_.c_str()); } if (is_anthropic(config.model)) { @@ -199,8 +199,9 @@ RemoteEngine::ModelConfig* RemoteEngine::GetModelConfig( return nullptr; } -CurlResponse RemoteEngine::MakeGetModelsRequest(const std::string& url, - const std::string& api_key) { +CurlResponse RemoteEngine::MakeGetModelsRequest( + const std::string& url, const std::string& api_key, + const std::string& api_key_template) { CURL* curl = curl_easy_init(); CurlResponse response; @@ -210,7 +211,9 @@ CurlResponse RemoteEngine::MakeGetModelsRequest(const std::string& url, return response; } - std::string api_key_header = "Authorization: Bearer " + api_key; + std::string api_key_header = + ReplaceApiKeyPlaceholder(api_key_template, api_key); + struct curl_slist* headers = nullptr; headers = curl_slist_append(headers, api_key_header.c_str()); headers = curl_slist_append(headers, "Content-Type: application/json"); @@ -251,7 +254,7 @@ CurlResponse RemoteEngine::MakeChatCompletionRequest( struct curl_slist* headers = nullptr; if (!config.api_key.empty()) { - headers = curl_slist_append(headers, api_key_template_.c_str()); + headers = curl_slist_append(headers, api_key_header_.c_str()); } if (is_anthropic(config.model)) { @@ -310,7 +313,7 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, // model_config.url = ; // Optional fields if (config["api_key_template"]) { - api_key_template_ = ReplaceApiKeyPlaceholder( + api_key_header_ = ReplaceApiKeyPlaceholder( config["api_key_template"].as(), api_key); } if (config["transform_req"]) { @@ -393,17 +396,6 @@ void RemoteEngine::LoadModel( const std::string& model_path = (*json_body)["model_path"].asString(); const std::string& api_key = (*json_body)["api_key"].asString(); - if (!LoadModelConfig(model, model_path, api_key)) { - Json::Value error; - error["error"] = "Failed to load model configuration"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - callback(std::move(status), std::move(error)); - return; - } if (json_body->isMember("metadata")) { metadata_ = (*json_body)["metadata"]; if (!metadata_["transform_req"].isNull() && @@ -424,6 +416,25 @@ void RemoteEngine::LoadModel( } } + if (!LoadModelConfig(model, model_path, api_key)) { + Json::Value error; + error["error"] = "Failed to load model configuration"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = k500InternalServerError; + callback(std::move(status), std::move(error)); + return; + } + + if (json_body->isMember("metadata")) { + if (!metadata_["api_key_template"].isNull()) { + api_key_header_ = ReplaceApiKeyPlaceholder( + metadata_["api_key_template"].asString(), api_key); + } + } + Json::Value response; response["status"] = "Model loaded successfully"; Json::Value status; @@ -688,7 +699,8 @@ void RemoteEngine::HandleEmbedding( } Json::Value RemoteEngine::GetRemoteModels(const std::string& url, - const std::string& api_key) { + const std::string& api_key, + const std::string& api_key_template) { if (url.empty()) { if (engine_name_ == kAnthropicEngine) { Json::Value json_resp; @@ -710,7 +722,7 @@ Json::Value RemoteEngine::GetRemoteModels(const std::string& url, return Json::Value(); } } else { - auto response = MakeGetModelsRequest(url, api_key); + auto response = MakeGetModelsRequest(url, api_key, api_key_template); if (response.error) { Json::Value error; error["error"] = response.error_message; diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 40198f966..8478af817 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -50,7 +50,7 @@ class RemoteEngine : public RemoteEngineI { Json::Value metadata_; std::string chat_req_template_; std::string chat_res_template_; - std::string api_key_template_; + std::string api_key_header_; std::string engine_name_; // Helper functions @@ -61,7 +61,8 @@ class RemoteEngine : public RemoteEngineI { const ModelConfig& config, const std::string& body, const std::function& callback); CurlResponse MakeGetModelsRequest(const std::string& url, - const std::string& api_key); + const std::string& api_key, + const std::string& api_key_template); // Internal model management bool LoadModelConfig(const std::string& model, const std::string& yaml_path, @@ -99,7 +100,8 @@ class RemoteEngine : public RemoteEngineI { std::function&& callback) override; Json::Value GetRemoteModels(const std::string& url, - const std::string& api_key) override; + const std::string& api_key, + const std::string& api_key_template) override; }; } // namespace remote_engine \ No newline at end of file diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 5956132b3..4923ab387 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -1077,7 +1077,9 @@ cpp::result EngineService::GetRemoteModels( auto& e = std::get(engines_[engine_name].engine); auto url = remote_engine_json["metadata"]["get_models_url"].asString(); auto api_key = remote_engine_json["api_key"].asString(); - auto res = e->GetRemoteModels(url, api_key); + auto api_key_template = + remote_engine_json["metadata"]["api_key_template"].asString(); + auto res = e->GetRemoteModels(url, api_key, api_key_template); if (!res["error"].isNull()) { return cpp::fail(res["error"].asString()); } else { @@ -1094,6 +1096,7 @@ bool EngineService::IsRemoteEngine(const std::string& engine_name) { } return true; } + cpp::result EngineService::GenerateRemoteModel( const std::string& engine_name) { namespace fmu = file_manager_utils; @@ -1112,7 +1115,9 @@ cpp::result EngineService::GenerateRemoteModel( auto& e = std::get(engines_[engine_name].engine); auto url = remote_engine_json["metadata"]["get_models_url"].asString(); auto api_key = remote_engine_json["api_key"].asString(); - auto res = e->GetRemoteModels(url, api_key); + auto api_key_template = + remote_engine_json["metadata"]["api_key_template"].asString(); + auto res = e->GetRemoteModels(url, api_key, api_key_template); if (!res["error"].isNull()) { return cpp::fail(res["error"].asString()); } else { From 0139fd2b10441e84cc43f1e598adcc0e58452f5b Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 19 Dec 2024 16:19:38 +0700 Subject: [PATCH 08/20] fix: add type for local engine --- engine/common/engine_servicei.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/engine/common/engine_servicei.h b/engine/common/engine_servicei.h index a4b0c8732..17f5d3de2 100644 --- a/engine/common/engine_servicei.h +++ b/engine/common/engine_servicei.h @@ -31,6 +31,7 @@ struct EngineVariantResponse { root["name"] = name; root["version"] = version; root["engine"] = engine; + root["type"] = "local"; return root; } }; @@ -58,6 +59,6 @@ class EngineServiceI { GetEngineByNameAndVariant( const std::string& engine_name, const std::optional variant = std::nullopt) = 0; - + virtual bool IsRemoteEngine(const std::string& engine_name) = 0; }; From 5da3540c6c82309b0010ecce65884ada5eacfc3d Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Sat, 28 Dec 2024 05:37:56 +0700 Subject: [PATCH 09/20] chore: cleanup --- engine/controllers/engines.cc | 5 -- .../extensions/remote-engine/remote_engine.cc | 39 +++++------ .../extensions/remote-engine/remote_engine.h | 2 + engine/services/engine_service.cc | 65 ------------------- engine/services/engine_service.h | 3 - engine/services/model_service.cc | 3 + engine/test/components/test_remote_engine.cc | 48 +++++++++++++- 7 files changed, 68 insertions(+), 97 deletions(-) diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 46f35046f..3b3725089 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -289,11 +289,6 @@ void Engines::InstallRemoteEngine( resp->setStatusCode(k400BadRequest); callback(resp); } else { - // auto gr = engine_service_->GenerateRemoteModel(engine); - // if (gr.has_error()) { - // CTL_INF("Error: " << gr.error()); - // } - Json::Value res; if (get_models_url.empty()) { res["warning"] = diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index e60c9b947..6c7b7881c 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -16,10 +16,6 @@ bool is_anthropic(const std::string& model) { return model.find("claude") != std::string::npos; } -bool is_openai(const std::string& model) { - return model.find("gpt") != std::string::npos; -} - constexpr const std::array kAnthropicModels = { "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229", @@ -59,23 +55,20 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, // Parse the JSON Json::Value chunk_json; - if (!is_openai(context->model)) { - std::string s = line.substr(6); - try { - auto root = json_helper::ParseJsonString(s); - root["model"] = context->model; - root["id"] = context->id; - root["stream"] = true; - auto result = context->renderer.Render(context->stream_template, root); - CTL_DBG(result); - chunk_json["data"] = "data: " + result + "\n\n"; - } catch (const std::exception& e) { - CTL_WRN("JSON parse error: " << e.what()); - continue; - } - } else { - chunk_json["data"] = line + "\n\n"; + std::string s = line.substr(6); + try { + auto root = json_helper::ParseJsonString(s); + root["model"] = context->model; + root["id"] = context->id; + root["stream"] = true; + auto result = context->renderer.Render(context->stream_template, root); + CTL_DBG(result); + chunk_json["data"] = "data: " + result + "\n\n"; + } catch (const std::exception& e) { + CTL_WRN("JSON parse error: " << e.what()); + continue; } + Json::Reader reader; Json::Value status; @@ -181,7 +174,7 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, } RemoteEngine::RemoteEngine(const std::string& engine_name) - : engine_name_(engine_name) { + : engine_name_(engine_name), q_(1 /*n_parallel*/, engine_name) { curl_global_init(CURL_GLOBAL_ALL); } @@ -552,7 +545,9 @@ void RemoteEngine::HandleChatCompletion( } if (is_stream) { - MakeStreamingChatCompletionRequest(*model_config, result, callback); + q_.runTaskInQueue([this, model_config, result, cb = std::move(callback)] { + MakeStreamingChatCompletionRequest(*model_config, result, cb); + }); } else { auto response = MakeChatCompletionRequest(*model_config, result); diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 8478af817..6f5259f39 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -9,6 +9,7 @@ #include #include "cortex-common/remote_enginei.h" #include "extensions/remote-engine/template_renderer.h" +#include "trantor/utils/ConcurrentTaskQueue.h" #include "utils/engine_constants.h" #include "utils/file_logger.h" // Helper for CURL response @@ -52,6 +53,7 @@ class RemoteEngine : public RemoteEngineI { std::string chat_res_template_; std::string api_key_header_; std::string engine_name_; + trantor::ConcurrentTaskQueue q_; // Helper functions CurlResponse MakeChatCompletionRequest(const ModelConfig& config, diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 3bbb6df82..73212a048 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -1140,71 +1140,6 @@ bool EngineService::IsRemoteEngine(const std::string& engine_name) { return true; } -cpp::result EngineService::GenerateRemoteModel( - const std::string& engine_name) { - namespace fmu = file_manager_utils; - namespace fs = std::filesystem; - auto exist_engine = GetEngineByNameAndVariant(engine_name); - if (exist_engine.has_error()) { - return cpp::fail("Remote engine '" + engine_name + "' is not installed"); - } - - if (!IsEngineLoaded(engine_name)) { - engines_[engine_name].engine = new remote_engine::RemoteEngine(engine_name); - CTL_INF("Loaded engine: " << engine_name); - } - - auto remote_engine_json = exist_engine.value().ToJson(); - auto& e = std::get(engines_[engine_name].engine); - auto url = remote_engine_json["metadata"]["get_models_url"].asString(); - auto api_key = remote_engine_json["api_key"].asString(); - auto api_key_template = - remote_engine_json["metadata"]["api_key_template"].asString(); - auto res = e->GetRemoteModels(url, api_key, api_key_template); - if (!res["error"].isNull()) { - return cpp::fail(res["error"].asString()); - } else { - for (auto& d : res["data"]) { - auto model_handle = d["id"].asString(); - config::RemoteModelConfig model_config; - Json::Value body = - json_helper::ParseJsonString(config::kDefaultRemoteModelConfig); - body["model"] = model_handle; - body["engine"] = engine_name; - // CTL_INF(body.toStyledString()); - model_config.LoadFromJson(body); - cortex::db::Models modellist_utils_obj; - - std::string model_yaml_path = - (file_manager_utils::GetModelsContainerPath() / - std::filesystem::path("remote") / - std::filesystem::path(model_handle + ".yml")) - .string(); - try { - auto yaml_rel_path = - fmu::ToRelativeCortexDataPath(fs::path(model_yaml_path)); - cortex::db::ModelEntry model_entry{ - model_handle, "", "", yaml_rel_path.string(), - model_handle, "remote", "imported", cortex::db::ModelStatus::Remote, - engine_name}; - std::filesystem::create_directories( - std::filesystem::path(model_yaml_path).parent_path()); - if (modellist_utils_obj.AddModelEntry(model_entry).value()) { - model_config.SaveToYamlFile(model_yaml_path); - } else { - CTL_INF("Fail to import model, model_id '" + model_handle + - "' already exists!"); - } - } catch (const std::exception& e) { - return cpp::fail("Error while adding Remote model with model_id '" + - model_handle + "': " + e.what()); - } - } - } - - return true; -} - cpp::result, std::string> EngineService::GetSupportedEngineNames() { return file_manager_utils::GetCortexConfig().supportedEngines; diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index ca652a7bd..8ead4f6d6 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -152,9 +152,6 @@ class EngineService : public EngineServiceI { bool IsRemoteEngine(const std::string& engine_name) override; - cpp::result GenerateRemoteModel( - const std::string& engine_name); - private: bool IsEngineLoaded(const std::string& engine); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 0d909b61f..8c0d2cd66 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -1253,5 +1253,8 @@ ModelService::GetModelMetadata(const std::string& model_id) const { std::shared_ptr ModelService::GetCachedModelMetadata( const std::string& model_id) const { + if (loaded_model_metadata_map_.find(model_id) == + loaded_model_metadata_map_.end()) + return nullptr; return loaded_model_metadata_map_.at(model_id); } diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc index bfac76f49..eed6e195f 100644 --- a/engine/test/components/test_remote_engine.cc +++ b/engine/test/components/test_remote_engine.cc @@ -25,19 +25,22 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { {% endfor %} ] {% endif %} + {% if not loop.is_last %},{% endif %} {% else if key == "system" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} "{{ key }}": {{ tojson(value) }} + {% if not loop.is_last %},{% endif %} {% endif %} - {% if not loop.is_last %},{% endif %} {% endfor %} })"; { std::string message_with_system = R"({ + "engine" : "anthropic", + "max_tokens" : 1024, "messages": [ {"role": "system", "content": "You are a seasoned data scientist at a Fortune 500 company."}, {"role": "user", "content": "Hello, world"} ], "model": "claude-3-5-sonnet-20241022", - "max_tokens": 1024, + "stream" : true })"; auto data = json_helper::ParseJsonString(message_with_system); @@ -78,4 +81,45 @@ TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { EXPECT_EQ(data["messages"][0]["content"].asString(), res_json["messages"][0]["content"].asString()); } +} + +TEST_F(RemoteEngineTest, OpenAiResponse) { + std::string tpl = R"({ + {% set first = true %} + {% for key, value in input_request %} + {% if key == "choices" or key == "created" or key == "model" or key == "service_tier" or key == "system_fingerprint" or key == "stream" or key == "object" or key == "usage" %} + {% if not first %},{% endif %} + "{{ key }}": {{ tojson(value) }} + {% set first = false %} + {% endif %} + {% endfor %} + })"; + std::string message = R"( + { + "choices": [ + { + "delta": { + "content": " questions" + }, + "finish_reason": null, + "index": 0 + } + ], + "created": 1735372587, + "id": "", + "model": "o1-preview", + "object": "chat.completion.chunk", + "stream": true, + "system_fingerprint": "fp_1ddf0263de" + })"; + auto data = json_helper::ParseJsonString(message); + + remote_engine::TemplateRenderer rdr; + auto res = rdr.Render(tpl, data); + + auto res_json = json_helper::ParseJsonString(res); + EXPECT_EQ(data["model"].asString(), res_json["model"].asString()); + EXPECT_EQ(data["created"].asInt(), res_json["created"].asInt()); + EXPECT_EQ(data["choices"][0]["delta"]["content"].asString(), + res_json["choices"][0]["delta"]["content"].asString()); } \ No newline at end of file From 8922896c7355620fb52a75dc161043f8b7541a5c Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 30 Dec 2024 16:46:55 +0700 Subject: [PATCH 10/20] fix: add remote engine to /v1/engines GET --- engine/common/engine_servicei.h | 7 ++- engine/controllers/engines.cc | 18 ++++++ .../extensions/remote-engine/remote_engine.cc | 55 ++++++++----------- engine/services/engine_service.cc | 5 +- engine/services/engine_service.h | 4 +- 5 files changed, 49 insertions(+), 40 deletions(-) diff --git a/engine/common/engine_servicei.h b/engine/common/engine_servicei.h index 17f5d3de2..ceb9b2fec 100644 --- a/engine/common/engine_servicei.h +++ b/engine/common/engine_servicei.h @@ -25,13 +25,14 @@ struct EngineVariantResponse { std::string name; std::string version; std::string engine; + std::string type; Json::Value ToJson() const { Json::Value root; root["name"] = name; root["version"] = version; root["engine"] = engine; - root["type"] = "local"; + root["type"] = type.empty() ? "local" : type; return root; } }; @@ -58,7 +59,7 @@ class EngineServiceI { virtual cpp::result GetEngineByNameAndVariant( const std::string& engine_name, - const std::optional variant = std::nullopt) = 0; + const std::optional variant = std::nullopt) const = 0; - virtual bool IsRemoteEngine(const std::string& engine_name) = 0; + virtual bool IsRemoteEngine(const std::string& engine_name) const = 0; }; diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 3b3725089..8cf98785e 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -307,6 +307,24 @@ void Engines::GetInstalledEngineVariants( const HttpRequestPtr& req, std::function&& callback, const std::string& engine) const { + + if (engine_service_->IsRemoteEngine(engine)) { + auto remote_engines = engine_service_->GetEngines(); + Json::Value releases(Json::arrayValue); + if (remote_engines.has_value()) { + for (auto e : remote_engines.value()) { + if (e.type == kRemote && e.engine_name == engine) { + releases.append(e.ToJson()); + break; + } + } + } + auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases); + resp->setStatusCode(k200OK); + callback(resp); + return; + } + auto result = engine_service_->GetInstalledEngineVariants(engine); if (result.has_error()) { Json::Value res; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 6c7b7881c..83d81c9c5 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -69,8 +69,6 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, continue; } - Json::Reader reader; - Json::Value status; status["is_done"] = false; status["has_error"] = false; @@ -114,6 +112,12 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( headers = curl_slist_append(headers, "Connection: keep-alive"); std::string stream_template = chat_res_template_; + if (!config.transform_resp["chat_completions"] && + !config.transform_resp["chat_completions"]["template"]) { + // Model level overrides engine level + stream_template = + config.transform_resp["chat_completions"]["template"].as(); + } StreamContext context{ std::make_shared>( @@ -522,12 +526,16 @@ void RemoteEngine::HandleChatCompletion( // Get template string with error check std::string template_str; - try { + if (!chat_req_template_.empty()) { + CTL_DBG("Use engine transform request template: " << chat_req_template_); + template_str = chat_req_template_; + } + if (!model_config->transform_req["chat_completions"] && + !model_config->transform_req["chat_completions"]["template"]) { + // Model level overrides engine level template_str = model_config->transform_req["chat_completions"]["template"] .as(); - } catch (const YAML::BadConversion& e) { - throw std::runtime_error("Failed to convert template node to string: " + - std::string(e.what())); + CTL_DBG("Use model transform request template: " << template_str); } // Render with error handling @@ -586,33 +594,14 @@ void RemoteEngine::HandleChatCompletion( CTL_DBG( "Use engine transform response template: " << chat_res_template_); template_str = chat_res_template_; - } else { - // Check if required YAML nodes exist - if (!model_config->transform_resp["chat_completions"]) { - throw std::runtime_error( - "Missing 'chat_completions' node in transform_resp"); - } - if (!model_config->transform_resp["chat_completions"]["template"]) { - throw std::runtime_error( - "Missing 'template' node in chat_completions"); - } - - // Validate JSON body - if (!response_json || response_json.isNull()) { - throw std::runtime_error("Invalid or null JSON body"); - } - - // Get template string with error check - - try { - template_str = - model_config->transform_resp["chat_completions"]["template"] - .as(); - } catch (const YAML::BadConversion& e) { - throw std::runtime_error( - "Failed to convert template node to string: " + - std::string(e.what())); - } + } + if (!model_config->transform_resp["chat_completions"] && + !model_config->transform_resp["chat_completions"]["template"]) { + // Model level overrides engine level + template_str = + model_config->transform_resp["chat_completions"]["template"] + .as(); + CTL_DBG("Use model transform request template: " << template_str); } try { diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 73212a048..c9bb8da56 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -1057,7 +1057,8 @@ cpp::result EngineService::GetEngineById( cpp::result EngineService::GetEngineByNameAndVariant( - const std::string& engine_name, const std::optional variant) { + const std::string& engine_name, + const std::optional variant) const { cortex::db::Engines engines; auto get_res = engines.GetEngineByNameAndVariant(engine_name, variant); @@ -1130,7 +1131,7 @@ cpp::result EngineService::GetRemoteModels( } } -bool EngineService::IsRemoteEngine(const std::string& engine_name) { +bool EngineService::IsRemoteEngine(const std::string& engine_name) const { auto ne = Repo2Engine(engine_name); auto local_engines = file_manager_utils::GetCortexConfig().supportedEngines; for (auto const& le : local_engines) { diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 8ead4f6d6..d3d1180dd 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -134,7 +134,7 @@ class EngineService : public EngineServiceI { cpp::result GetEngineByNameAndVariant( const std::string& engine_name, - const std::optional variant = std::nullopt) override; + const std::optional variant = std::nullopt) const override; cpp::result UpsertEngine( const std::string& engine_name, const std::string& type, @@ -150,7 +150,7 @@ class EngineService : public EngineServiceI { void RegisterEngineLibPath(); - bool IsRemoteEngine(const std::string& engine_name) override; + bool IsRemoteEngine(const std::string& engine_name) const override; private: bool IsEngineLoaded(const std::string& engine); From 3ee663eba4230a0e4e54c12a23b0e9a715f43e16 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 3 Jan 2025 12:14:20 +0700 Subject: [PATCH 11/20] fix: build --- engine/test/components/test_remote_engine.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc index b91411b7e..0d0529f87 100644 --- a/engine/test/components/test_remote_engine.cc +++ b/engine/test/components/test_remote_engine.cc @@ -114,12 +114,12 @@ TEST_F(RemoteEngineTest, OpenAiResponse) { })"; auto data = json_helper::ParseJsonString(message); - remote_engine::TemplateRenderer rdr; + extensions::TemplateRenderer rdr; auto res = rdr.Render(tpl, data); auto res_json = json_helper::ParseJsonString(res); - EXPECT_EQ(data["model"].asString(), res_json["model"].asString()); - EXPECT_EQ(data["created"].asInt(), res_json["created"].asInt()); - EXPECT_EQ(data["choices"][0]["delta"]["content"].asString(), - res_json["choices"][0]["delta"]["content"].asString()); + EXPECT_EQ(data["model"].asString(), res_json["model"].asString()); + EXPECT_EQ(data["created"].asInt(), res_json["created"].asInt()); + EXPECT_EQ(data["choices"][0]["delta"]["content"].asString(), + res_json["choices"][0]["delta"]["content"].asString()); } \ No newline at end of file From 3948b3966a36592b2fb42daef8bcbe253a593cea Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 3 Jan 2025 15:25:54 +0700 Subject: [PATCH 12/20] fix: load engine when start model --- engine/extensions/remote-engine/remote_engine.cc | 8 ++++---- engine/services/engine_service.cc | 8 +++++--- engine/services/model_service.cc | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 83d81c9c5..ff62787da 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -530,8 +530,8 @@ void RemoteEngine::HandleChatCompletion( CTL_DBG("Use engine transform request template: " << chat_req_template_); template_str = chat_req_template_; } - if (!model_config->transform_req["chat_completions"] && - !model_config->transform_req["chat_completions"]["template"]) { + if (model_config->transform_req["chat_completions"] && + model_config->transform_req["chat_completions"]["template"]) { // Model level overrides engine level template_str = model_config->transform_req["chat_completions"]["template"] .as(); @@ -595,8 +595,8 @@ void RemoteEngine::HandleChatCompletion( "Use engine transform response template: " << chat_res_template_); template_str = chat_res_template_; } - if (!model_config->transform_resp["chat_completions"] && - !model_config->transform_resp["chat_completions"]["template"]) { + if (model_config->transform_resp["chat_completions"] && + model_config->transform_resp["chat_completions"]["template"]) { // Model level overrides engine level template_str = model_config->transform_resp["chat_completions"]["template"] diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 645817be8..4a935fd20 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -736,9 +736,11 @@ cpp::result EngineService::LoadEngine( return cpp::fail("Remote engine '" + engine_name + "' is not installed"); } - engines_[engine_name].engine = new remote_engine::RemoteEngine(engine_name); - - CTL_INF("Loaded engine: " << engine_name); + if (!IsEngineLoaded(engine_name)) { + engines_[engine_name].engine = + new remote_engine::RemoteEngine(engine_name); + CTL_INF("Loaded engine: " << engine_name); + } return {}; } diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index eb62118c3..799eeadf4 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -900,7 +900,7 @@ cpp::result ModelService::StartModel( // Running remote model if (engine_svc_->IsRemoteEngine(mc.engine)) { - + engine_svc_->LoadEngine(mc.engine); config::RemoteModelConfig remote_mc; remote_mc.LoadFromYamlFile( fmu::ToAbsoluteCortexDataPath( From 06198161c5eef2d708ad9f42f0bb0bcd399e9ba9 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 3 Jan 2025 16:14:04 +0700 Subject: [PATCH 13/20] chore: add log --- .../extensions/remote-engine/remote_engine.cc | 38 +++++++++++++------ .../extensions/remote-engine/remote_engine.h | 1 + engine/services/engine_service.cc | 6 +++ 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index ff62787da..2e11c780b 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -93,8 +93,13 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( return response; } - std::string full_url = - config.transform_req["chat_completions"]["url"].as(); + std::string full_url = chat_url_; + + if (config.transform_req["chat_completions"]["url"]) { + full_url = + config.transform_req["chat_completions"]["url"].as(); + } + CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; if (!config.api_key.empty()) { @@ -246,8 +251,13 @@ CurlResponse RemoteEngine::MakeChatCompletionRequest( response.error_message = "Failed to initialize CURL"; return response; } - std::string full_url = - config.transform_req["chat_completions"]["url"].as(); + std::string full_url = chat_url_; + + if (config.transform_req["chat_completions"]["url"]) { + full_url = + config.transform_req["chat_completions"]["url"].as(); + } + CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; if (!config.api_key.empty()) { @@ -411,6 +421,14 @@ void RemoteEngine::LoadModel( .asString(); CTL_INF(chat_res_template_); } + + if (!metadata_["transform_req"].isNull() && + !metadata_["transform_req"]["chat_completions"].isNull() && + !metadata_["transform_req"]["chat_completions"]["url"].isNull()) { + chat_url_ = + metadata_["transform_req"]["chat_completions"]["url"].asString(); + CTL_INF(chat_url_); + } } if (!LoadModelConfig(model, model_path, api_key)) { @@ -713,14 +731,12 @@ Json::Value RemoteEngine::GetRemoteModels(const std::string& url, CTL_WRN(response.error_message); return error; } - Json::Value response_json; - Json::Reader reader; - if (!reader.parse(response.body, response_json)) { - Json::Value error; - error["error"] = "Failed to parse response"; - return error; + CTL_DBG(response.body); + auto body_json = json_helper::ParseJsonString(response.body); + if (body_json.isMember("error")) { + return body_json["error"]; } - return response_json; + return body_json; } } diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 856256211..8b98c0a1a 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -53,6 +53,7 @@ class RemoteEngine : public RemoteEngineI { std::string chat_res_template_; std::string api_key_header_; std::string engine_name_; + std::string chat_url_; trantor::ConcurrentTaskQueue q_; // Helper functions diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 4a935fd20..9c0c2b544 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -1142,6 +1142,12 @@ cpp::result EngineService::GetRemoteModels( auto api_key = remote_engine_json["api_key"].asString(); auto api_key_template = remote_engine_json["metadata"]["api_key_template"].asString(); + if (url.empty()) + CTL_WRN("url is empty"); + if (api_key.empty()) + CTL_WRN("api_key is empty"); + if (api_key_template.empty()) + CTL_WRN("api_key_template is empty"); auto res = e->GetRemoteModels(url, api_key, api_key_template); if (!res["error"].isNull()) { return cpp::fail(res["error"].asString()); From 68954522e7d51e064c5cd33a8fdb4933d47f1c62 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Sat, 4 Jan 2025 10:52:59 +0700 Subject: [PATCH 14/20] fix: ignore chat_completions in model --- engine/extensions/remote-engine/remote_engine.cc | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 2e11c780b..0f19a3631 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -117,8 +117,8 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( headers = curl_slist_append(headers, "Connection: keep-alive"); std::string stream_template = chat_res_template_; - if (!config.transform_resp["chat_completions"] && - !config.transform_resp["chat_completions"]["template"]) { + if (config.transform_resp["chat_completions"] && + config.transform_resp["chat_completions"]["template"]) { // Model level overrides engine level stream_template = config.transform_resp["chat_completions"]["template"].as(); @@ -528,15 +528,6 @@ void RemoteEngine::HandleChatCompletion( // Transform request std::string result; try { - // Check if required YAML nodes exist - if (!model_config->transform_req["chat_completions"]) { - throw std::runtime_error( - "Missing 'chat_completions' node in transform_req"); - } - if (!model_config->transform_req["chat_completions"]["template"]) { - throw std::runtime_error("Missing 'template' node in chat_completions"); - } - // Validate JSON body if (!json_body || json_body->isNull()) { throw std::runtime_error("Invalid or null JSON body"); From 94b6285e34cfa14db1558f3be52808729d9b4b8f Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Sun, 5 Jan 2025 10:26:03 +0700 Subject: [PATCH 15/20] fix: delete remote model --- engine/config/model_config.h | 23 ------------------- .../extensions/remote-engine/remote_engine.cc | 6 ++++- engine/services/model_service.cc | 3 ++- 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 708509808..a1eed59e8 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -48,30 +48,7 @@ struct RemoteModelConfig { // Load JSON object fields directly inference_params = json.get("inference_params", inference_params); transform_req = json.get("transform_req", transform_req); - // Use default template if it is empty, currently we only support 2 remote engines - auto is_anthropic = [](const std::string& model) { - return model.find("claude") != std::string::npos; - }; - if (transform_req["chat_completions"]["template"].isNull()) { - if (is_anthropic(model)) { - transform_req["chat_completions"]["template"] = - kAnthropicTransformReqTemplate; - } else { - transform_req["chat_completions"]["template"] = - kOpenAITransformReqTemplate; - } - } transform_resp = json.get("transform_resp", transform_resp); - if (transform_resp["chat_completions"]["template"].isNull()) { - if (is_anthropic(model)) { - transform_resp["chat_completions"]["template"] = - kAnthropicTransformRespTemplate; - } else { - transform_resp["chat_completions"]["template"] = - kOpenAITransformRespTemplate; - } - } - metadata = json.get("metadata", metadata); } diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 0f19a3631..dddfc2ff7 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -55,9 +55,13 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, // Parse the JSON Json::Value chunk_json; - std::string s = line.substr(6); + std::string s = line; + if (line.size() > 6) + s = line.substr(6); try { auto root = json_helper::ParseJsonString(s); + if (root.getMemberNames().empty()) + continue; root["model"] = context->model; root["id"] = context->id; root["stream"] = true; diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 799eeadf4..5790c4a90 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -778,7 +778,8 @@ cpp::result ModelService::DeleteModel( // Remove yaml file std::filesystem::remove(yaml_fp); // Remove model files if they are not imported locally - if (model_entry.value().branch_name != "imported") { + if (model_entry.value().branch_name != "imported" && + !engine_svc_->IsRemoteEngine(mc.engine)) { if (mc.files.size() > 0) { if (mc.engine == kLlamaRepo || mc.engine == kLlamaEngine) { for (auto& file : mc.files) { From 5e889cd1fd19fcb8c71744bc3b09fb9cd9b30228 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 7 Jan 2025 15:27:55 +0700 Subject: [PATCH 16/20] fix: replace api_key_template by header_template --- docs/static/openapi/cortex.json | 8 +- engine/config/model_config.h | 12 +- engine/config/remote_template.h | 2 +- engine/cortex-common/remote_enginei.h | 2 +- .../extensions/remote-engine/remote_engine.cc | 162 +++++++++++++----- .../extensions/remote-engine/remote_engine.h | 8 +- engine/services/engine_service.cc | 10 +- engine/services/model_service.cc | 4 + 8 files changed, 142 insertions(+), 66 deletions(-) diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 1f0a8c0ef..2deb15e5e 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -5397,9 +5397,9 @@ "type": "string", "description": "The identifier of the model." }, - "api_key_template": { + "header_template": { "type": "string", - "description": "Template for the API key header." + "description": "Template for the header." }, "engine": { "type": "string", @@ -6162,9 +6162,9 @@ "description": "Number of GPU layers.", "example": 33 }, - "api_key_template": { + "header_template": { "type": "string", - "description": "Template for the API key header." + "description": "Template for the header." }, "version": { "type": "string", diff --git a/engine/config/model_config.h b/engine/config/model_config.h index a1eed59e8..ce84ad63c 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -19,7 +19,7 @@ namespace config { struct RemoteModelConfig { std::string model; - std::string api_key_template; + std::string header_template; std::string engine; std::string version; size_t created; @@ -36,8 +36,8 @@ struct RemoteModelConfig { // Load basic string fields model = json.get("model", model).asString(); - api_key_template = - json.get("api_key_template", api_key_template).asString(); + header_template = + json.get("header_template", header_template).asString(); engine = json.get("engine", engine).asString(); version = json.get("version", version).asString(); created = @@ -57,7 +57,7 @@ struct RemoteModelConfig { // Add basic string fields json["model"] = model; - json["api_key_template"] = api_key_template; + json["header_template"] = header_template; json["engine"] = engine; json["version"] = version; json["created"] = static_cast(created); @@ -78,7 +78,7 @@ struct RemoteModelConfig { // Convert basic fields root["model"] = model; - root["api_key_template"] = api_key_template; + root["header_template"] = header_template; root["engine"] = engine; root["version"] = version; root["object"] = object; @@ -111,7 +111,7 @@ struct RemoteModelConfig { // Load basic fields model = root["model"].as(""); - api_key_template = root["api_key_template"].as(""); + header_template = root["header_template"].as(""); engine = root["engine"].as(""); version = root["version"] ? root["version"].as() : ""; created = root["created"] ? root["created"].as() : 0; diff --git a/engine/config/remote_template.h b/engine/config/remote_template.h index 169bb854c..8f6cb50c2 100644 --- a/engine/config/remote_template.h +++ b/engine/config/remote_template.h @@ -66,7 +66,7 @@ const std::string kAnthropicTransformRespTemplate = R"({ const std::string kDefaultRemoteModelConfig = R"( { "model": "o1-preview", - "api_key_template": "Authorization: Bearer {{api_key}}", + "header_template": "Authorization: Bearer {{api_key}}", "engine": "openai", "version": "1", "inference_params": { diff --git a/engine/cortex-common/remote_enginei.h b/engine/cortex-common/remote_enginei.h index 190758541..835f526a0 100644 --- a/engine/cortex-common/remote_enginei.h +++ b/engine/cortex-common/remote_enginei.h @@ -35,5 +35,5 @@ class RemoteEngineI { // Get available remote models virtual Json::Value GetRemoteModels(const std::string& url, const std::string& api_key, - const std::string& api_key_template) = 0; + const std::string& header_template) = 0; }; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index dddfc2ff7..037815b19 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -1,6 +1,7 @@ #include "remote_engine.h" #include #include +#include #include #include #include "utils/json_helper.h" @@ -16,6 +17,79 @@ bool is_anthropic(const std::string& model) { return model.find("claude") != std::string::npos; } +std::vector GetReplacements(const std::string& header_template) { + std::vector replacements; + std::regex placeholder_regex(R"(\{\{(.*?)\}\})"); + std::smatch match; + + std::string template_copy = header_template; + while (std::regex_search(template_copy, match, placeholder_regex)) { + std::string key = match[1].str(); + replacements.push_back(key); + template_copy = match.suffix().str(); + } + + return replacements; +} + +std::vector ReplaceHeaderPlaceholder( + const std::string& header_template, + const std::unordered_map& replacements) { + std::vector result; + size_t start = 0; + size_t end = header_template.find("}}"); + + while (end != std::string::npos) { + // Extract the part + std::string part = header_template.substr(start, end - start + 2); + + // Replace variables in this part + for (const auto& var : replacements) { + std::string placeholder = "{{" + var.first + "}}"; + size_t pos = part.find(placeholder); + if (pos != std::string::npos) { + part.replace(pos, placeholder.length(), var.second); + } + } + + // Trim whitespace + part.erase(0, part.find_first_not_of(" \t\n\r\f\v")); + part.erase(part.find_last_not_of(" \t\n\r\f\v") + 1); + + // Add to result if not empty + if (!part.empty()) { + result.push_back(part); + } + + // Move to next part + start = end + 2; + end = header_template.find("}}", start); + } + + // Process any remaining part + if (start < header_template.length()) { + std::string part = header_template.substr(start); + + // Replace variables in this part + for (const auto& var : replacements) { + std::string placeholder = "{{" + var.first + "}}"; + size_t pos = part.find(placeholder); + if (pos != std::string::npos) { + part.replace(pos, placeholder.length(), var.second); + } + } + + // Trim whitespace + part.erase(0, part.find_first_not_of(" \t\n\r\f\v")); + part.erase(part.find_last_not_of(" \t\n\r\f\v") + 1); + + if (!part.empty()) { + result.push_back(part); + } + } + return result; +} + constexpr const std::array kAnthropicModels = { "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229", @@ -29,7 +103,7 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, std::string chunk(ptr, size * nmemb); context->buffer += chunk; - + CTL_DBG(chunk); // Process complete lines size_t pos; while ((pos = context->buffer.find('\n')) != std::string::npos) { @@ -106,13 +180,8 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; - if (!config.api_key.empty()) { - headers = curl_slist_append(headers, api_key_header_.c_str()); - } - - if (is_anthropic(config.model)) { - std::string v = "anthropic-version: " + config.version; - headers = curl_slist_append(headers, v.c_str()); + for (auto const& h : header_) { + headers = curl_slist_append(headers, h.c_str()); } headers = curl_slist_append(headers, "Content-Type: application/json"); @@ -180,6 +249,21 @@ std::string ReplaceApiKeyPlaceholder(const std::string& templateStr, return result; } +std::vector ReplaceHeaderPlaceholder( + const std::string& template_str, Json::Value json_body) { + CTL_DBG(template_str); + auto keys = GetReplacements(template_str); + if (keys.empty()) + return std::vector{}; + std::unordered_map replacements; + for (auto const& k : keys) { + if (json_body.isMember(k)) { + replacements.insert({k, json_body[k].asString()}); + } + } + return ReplaceHeaderPlaceholder(template_str, replacements); +} + static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, std::string* data) { data->append(ptr, size * nmemb); @@ -207,7 +291,7 @@ RemoteEngine::ModelConfig* RemoteEngine::GetModelConfig( CurlResponse RemoteEngine::MakeGetModelsRequest( const std::string& url, const std::string& api_key, - const std::string& api_key_template) { + const std::string& header_template) { CURL* curl = curl_easy_init(); CurlResponse response; @@ -218,7 +302,7 @@ CurlResponse RemoteEngine::MakeGetModelsRequest( } std::string api_key_header = - ReplaceApiKeyPlaceholder(api_key_template, api_key); + ReplaceApiKeyPlaceholder(header_template, api_key); struct curl_slist* headers = nullptr; headers = curl_slist_append(headers, api_key_header.c_str()); @@ -264,14 +348,10 @@ CurlResponse RemoteEngine::MakeChatCompletionRequest( CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; - if (!config.api_key.empty()) { - headers = curl_slist_append(headers, api_key_header_.c_str()); + for (auto const& h : header_) { + headers = curl_slist_append(headers, h.c_str()); } - if (is_anthropic(config.model)) { - std::string v = "anthropic-version: " + config.version; - headers = curl_slist_append(headers, v.c_str()); - } headers = curl_slist_append(headers, "Content-Type: application/json"); curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); @@ -300,32 +380,21 @@ CurlResponse RemoteEngine::MakeChatCompletionRequest( bool RemoteEngine::LoadModelConfig(const std::string& model, const std::string& yaml_path, - const std::string& api_key) { + const Json::Value& body) { try { YAML::Node config = YAML::LoadFile(yaml_path); ModelConfig model_config; model_config.model = model; - if (is_anthropic(model)) { - if (!config["version"]) { - CTL_ERR("Missing version for model: " << model); - return false; - } - model_config.version = config["version"].as(); - } - - // Required fields - if (!config["api_key_template"]) { - LOG_ERROR << "Missing required fields in config for model " << model; - return false; - } - - model_config.api_key = api_key; + model_config.api_key = body["api_key"].asString(); // model_config.url = ; // Optional fields - if (config["api_key_template"]) { - api_key_header_ = ReplaceApiKeyPlaceholder( - config["api_key_template"].as(), api_key); + if (auto s = config["header_template"]; + s && !s.as().empty()) { + header_ = ReplaceHeaderPlaceholder(s.as(), body); + for (auto const& h : header_) { + CTL_DBG("header: " << h); + } } if (config["transform_req"]) { model_config.transform_req = config["transform_req"]; @@ -435,7 +504,17 @@ void RemoteEngine::LoadModel( } } - if (!LoadModelConfig(model, model_path, api_key)) { + if (json_body->isMember("metadata")) { + if (!metadata_["header_template"].isNull()) { + header_ = ReplaceHeaderPlaceholder( + metadata_["header_template"].asString(), *json_body); + for (auto const& h : header_) { + CTL_DBG("header: " << h); + } + } + } + + if (!LoadModelConfig(model, model_path, *json_body)) { Json::Value error; error["error"] = "Failed to load model configuration"; Json::Value status; @@ -447,13 +526,6 @@ void RemoteEngine::LoadModel( return; } - if (json_body->isMember("metadata")) { - if (!metadata_["api_key_template"].isNull()) { - api_key_header_ = ReplaceApiKeyPlaceholder( - metadata_["api_key_template"].asString(), api_key); - } - } - Json::Value response; response["status"] = "Model loaded successfully"; Json::Value status; @@ -697,7 +769,7 @@ void RemoteEngine::HandleEmbedding( Json::Value RemoteEngine::GetRemoteModels(const std::string& url, const std::string& api_key, - const std::string& api_key_template) { + const std::string& header_template) { if (url.empty()) { if (engine_name_ == kAnthropicEngine) { Json::Value json_resp; @@ -719,7 +791,7 @@ Json::Value RemoteEngine::GetRemoteModels(const std::string& url, return Json::Value(); } } else { - auto response = MakeGetModelsRequest(url, api_key, api_key_template); + auto response = MakeGetModelsRequest(url, api_key, header_template); if (response.error) { Json::Value error; error["error"] = response.error_message; diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 8b98c0a1a..bc6d534c5 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -51,7 +51,7 @@ class RemoteEngine : public RemoteEngineI { Json::Value metadata_; std::string chat_req_template_; std::string chat_res_template_; - std::string api_key_header_; + std::vector header_; std::string engine_name_; std::string chat_url_; trantor::ConcurrentTaskQueue q_; @@ -65,11 +65,11 @@ class RemoteEngine : public RemoteEngineI { const std::function& callback); CurlResponse MakeGetModelsRequest(const std::string& url, const std::string& api_key, - const std::string& api_key_template); + const std::string& header_template); // Internal model management bool LoadModelConfig(const std::string& model, const std::string& yaml_path, - const std::string& api_key); + const Json::Value& body); ModelConfig* GetModelConfig(const std::string& model); public: @@ -104,7 +104,7 @@ class RemoteEngine : public RemoteEngineI { Json::Value GetRemoteModels(const std::string& url, const std::string& api_key, - const std::string& api_key_template) override; + const std::string& header_template) override; }; } // namespace remote_engine \ No newline at end of file diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 9c0c2b544..1458596fc 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -1140,15 +1140,15 @@ cpp::result EngineService::GetRemoteModels( auto& e = std::get(engines_[engine_name].engine); auto url = remote_engine_json["metadata"]["get_models_url"].asString(); auto api_key = remote_engine_json["api_key"].asString(); - auto api_key_template = - remote_engine_json["metadata"]["api_key_template"].asString(); + auto header_template = + remote_engine_json["metadata"]["header_template"].asString(); if (url.empty()) CTL_WRN("url is empty"); if (api_key.empty()) CTL_WRN("api_key is empty"); - if (api_key_template.empty()) - CTL_WRN("api_key_template is empty"); - auto res = e->GetRemoteModels(url, api_key, api_key_template); + if (header_template.empty()) + CTL_WRN("header_template is empty"); + auto res = e->GetRemoteModels(url, api_key, header_template); if (!res["error"].isNull()) { return cpp::fail(res["error"].asString()); } else { diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 5790c4a90..3cefc3af6 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -917,6 +917,10 @@ cpp::result ModelService::StartModel( json_data = remote_mc.ToJson(); json_data["api_key"] = std::move(remote_engine_json["api_key"]); + if (auto v = remote_engine_json["version"].asString(); + !v.empty() && v != "latest") { + json_data["version"] = v; + } json_data["model_path"] = fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) From 64801c79372db63ac26e2018f063db74c99c5c10 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 7 Jan 2025 16:29:54 +0700 Subject: [PATCH 17/20] fix: use engine from model yaml --- engine/controllers/server.cc | 6 +++++- engine/services/inference_service.cc | 5 +++++ engine/services/inference_service.h | 2 ++ engine/services/model_service.cc | 19 +++++++++++++++++++ engine/services/model_service.h | 2 ++ 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 83eaddb4e..baebc3baa 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -44,6 +44,11 @@ void server::ChatCompletion( } }(); + if (auto efm = inference_svc_->GetEngineByModelId(model_id); !efm.empty()) { + engine_type = efm; + (*json_body)["engine"] = efm; + } + LOG_DEBUG << "request body: " << json_body->toStyledString(); auto q = std::make_shared(); auto ir = inference_svc_->HandleChatCompletion(q, json_body); @@ -203,7 +208,6 @@ void server::RouteRequest( ProcessNonStreamRes(callback, *q); LOG_TRACE << "Done route request"; } - } void server::LoadModel(const HttpRequestPtr& req, diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 3668fb6fe..057b6f716 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -394,3 +394,8 @@ bool InferenceService::HasFieldInReq(std::shared_ptr json_body, } return true; } + +std::string InferenceService::GetEngineByModelId( + const std::string& model_id) const { + return model_service_.lock()->GetEngineByModelId(model_id); +} diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h index f23be3f23..794110f99 100644 --- a/engine/services/inference_service.h +++ b/engine/services/inference_service.h @@ -69,6 +69,8 @@ class InferenceService { model_service_ = model_service; } + std::string GetEngineByModelId(const std::string& model_id) const; + private: std::shared_ptr engine_service_; std::weak_ptr model_service_; diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 3cefc3af6..83a998e82 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -1394,3 +1394,22 @@ std::shared_ptr ModelService::GetCachedModelMetadata( return nullptr; return loaded_model_metadata_map_.at(model_id); } + +std::string ModelService::GetEngineByModelId( + const std::string& model_id) const { + namespace fs = std::filesystem; + namespace fmu = file_manager_utils; + auto model_entry = db_service_->GetModelInfo(model_id); + if (model_entry.has_error()) { + CTL_WRN("Error: " + model_entry.error()); + return ""; + } + config::YamlHandler yaml_handler; + yaml_handler.ModelConfigFromFile( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.value().path_to_model_yaml)) + .string()); + auto mc = yaml_handler.GetModelConfig(); + CTL_DBG(mc.engine); + return mc.engine; +} \ No newline at end of file diff --git a/engine/services/model_service.h b/engine/services/model_service.h index cc659fea5..a668b27ba 100644 --- a/engine/services/model_service.h +++ b/engine/services/model_service.h @@ -96,6 +96,8 @@ class ModelService { std::shared_ptr GetCachedModelMetadata( const std::string& model_id) const; + std::string GetEngineByModelId(const std::string& model_id) const; + private: /** * Handle downloading model which have following pattern: author/model_name From 6183572cd9f3dd41ba410b2150654aac0a8f39f2 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 7 Jan 2025 17:10:43 +0700 Subject: [PATCH 18/20] fix: better error handling in stream mode --- engine/controllers/server.cc | 9 +++++++-- engine/extensions/remote-engine/remote_engine.cc | 16 +++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index baebc3baa..a8cff2166 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -227,7 +227,7 @@ void server::ProcessStreamRes(std::function cb, auto err_or_done = std::make_shared(false); auto chunked_content_provider = [this, q, err_or_done, engine_type, model_id]( char* buf, - std::size_t buf_size) -> std::size_t { + std::size_t buf_size) -> std::size_t { if (buf == nullptr) { LOG_TRACE << "Buf is null"; if (!(*err_or_done)) { @@ -247,7 +247,12 @@ void server::ProcessStreamRes(std::function cb, *err_or_done = true; } - auto str = res["data"].asString(); + std::string str; + if (status["status_code"].asInt() != k200OK) { + str = json_helper::DumpJsonString(res); + } else { + str = res["data"].asString(); + } LOG_DEBUG << "data: " << str; std::size_t n = std::min(str.size(), buf_size); memcpy(buf, str.data(), n); diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 037815b19..34dacad9b 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -101,9 +101,20 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) { auto* context = static_cast(userdata); std::string chunk(ptr, size * nmemb); + CTL_DBG(chunk); + auto check_error = json_helper::ParseJsonString(chunk); + if (check_error.isMember("error")) { + CTL_WRN(chunk); + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = true; + status["status_code"] = k400BadRequest; + (*context->callback)(std::move(status), std::move(check_error)); + return size * nmemb; + } context->buffer += chunk; - CTL_DBG(chunk); // Process complete lines size_t pos; while ((pos = context->buffer.find('\n')) != std::string::npos) { @@ -389,8 +400,7 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, model_config.api_key = body["api_key"].asString(); // model_config.url = ; // Optional fields - if (auto s = config["header_template"]; - s && !s.as().empty()) { + if (auto s = config["header_template"]; s && !s.as().empty()) { header_ = ReplaceHeaderPlaceholder(s.as(), body); for (auto const& h : header_) { CTL_DBG("header: " << h); From 1cfcd602246fa4a9c8dc2c741e9c8a24856f5484 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 8 Jan 2025 10:36:00 +0700 Subject: [PATCH 19/20] chore: cleanup --- engine/config/model_config.h | 1 - engine/config/remote_template.h | 107 ------------------ engine/extensions/remote-engine/helper.h | 80 +++++++++++++ .../extensions/remote-engine/remote_engine.cc | 85 +------------- engine/test/components/test_remote_engine.cc | 66 +++++++++++ 5 files changed, 151 insertions(+), 188 deletions(-) delete mode 100644 engine/config/remote_template.h create mode 100644 engine/extensions/remote-engine/helper.h diff --git a/engine/config/model_config.h b/engine/config/model_config.h index ce84ad63c..539860d9d 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -11,7 +11,6 @@ #include #include #include -#include "config/remote_template.h" #include "utils/format_utils.h" #include "utils/remote_models_utils.h" diff --git a/engine/config/remote_template.h b/engine/config/remote_template.h deleted file mode 100644 index 8f6cb50c2..000000000 --- a/engine/config/remote_template.h +++ /dev/null @@ -1,107 +0,0 @@ -#include - -namespace config { -const std::string kOpenAITransformReqTemplate = - R"({ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} })"; -const std::string kOpenAITransformRespTemplate = - R"({ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == "id" or key == "choices" or key == "created" or key == "model" or key == "service_tier" or key == "system_fingerprint" or key == "object" or key == "usage" -%} {%- if not first -%},{%- endif -%} "{{ key }}": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} })"; -const std::string kAnthropicTransformReqTemplate = - R"({ - {% for key, value in input_request %} - {% if key == "messages" %} - {% if input_request.messages.0.role == "system" %} - "system": "{{ input_request.messages.0.content }}", - "messages": [ - {% for message in input_request.messages %} - {% if not loop.is_first %} - {"role": "{{ message.role }}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %} - {% endif %} - {% endfor %} - ] - {% else %} - "messages": [ - {% for message in input_request.messages %} - {"role": " {{ message.role}}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %} - {% endfor %} - ] - {% endif %} - {% else if key == "system" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} - "{{ key }}": {{ tojson(value) }} - {% endif %} - {% if not loop.is_last %},{% endif %} - {% endfor %} })"; -const std::string kAnthropicTransformRespTemplate = R"({ - "id": "{{ input_request.id }}", - "created": null, - "object": "chat.completion", - "model": "{{ input_request.model }}", - "choices": [ - { - "index": 0, - "message": { - "role": "{{ input_request.role }}", - "content": "{% if input_request.content and input_request.content.0.type == "text" %} {{input_request.content.0.text}} {% endif %}", - "refusal": null - }, - "logprobs": null, - "finish_reason": "{{ input_request.stop_reason }}" - } - ], - "usage": { - "prompt_tokens": {{ input_request.usage.input_tokens }}, - "completion_tokens": {{ input_request.usage.output_tokens }}, - "total_tokens": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, - "prompt_tokens_details": { - "cached_tokens": 0 - }, - "completion_tokens_details": { - "reasoning_tokens": 0, - "accepted_prediction_tokens": 0, - "rejected_prediction_tokens": 0 - } - }, - "system_fingerprint": "fp_6b68a8204b" - })"; - -const std::string kDefaultRemoteModelConfig = R"( -{ - "model": "o1-preview", - "header_template": "Authorization: Bearer {{api_key}}", - "engine": "openai", - "version": "1", - "inference_params": { - "temperature": 0.7, - "top_p": 0.95, - "frequency_penalty": 0, - "presence_penalty": 0, - "max_tokens": 4096, - "stream": true - }, - "transform_req": { - "get_models": { - "url": "https://api.openai.com/v1/models" - }, - "chat_completions": { - "url": "https://api.openai.com/v1/chat/completions", - "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }" - }, - "embeddings": { - "url": "https://api.openai.com/v1/embeddings", - "template": "{\"input\": {{tojson(input)}}, \"model\": \"text-embedding-ada-002\"}" - } - }, - "transform_resp": { - "chat_completions": { - "template":"{ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == \"id\" or key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"object\" or key == \"usage\" -%} {%- if not first -%},{%- endif -%} \"{{ key }}\": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} }" - }, - "embeddings": {} - }, - "metadata": { - "author": "OpenAI", - "description": "GPT-4 is a large language model by OpenAI", - "end_point": "https://api.openai.com/v1/chat/completions", - "logo": "https://i.pinimg.com/564x/08/ea/94/08ea94ca94a4b3a04037bdfc335ae00d.jpg", - "api_key_url": "https://platform.openai.com/api-keys" - } -})"; -} // namespace config \ No newline at end of file diff --git a/engine/extensions/remote-engine/helper.h b/engine/extensions/remote-engine/helper.h new file mode 100644 index 000000000..5a99e5f33 --- /dev/null +++ b/engine/extensions/remote-engine/helper.h @@ -0,0 +1,80 @@ +#pragma once +#include +#include +#include +#include + +namespace remote_engine { +std::vector GetReplacements(const std::string& header_template) { + std::vector replacements; + std::regex placeholder_regex(R"(\{\{(.*?)\}\})"); + std::smatch match; + + std::string template_copy = header_template; + while (std::regex_search(template_copy, match, placeholder_regex)) { + std::string key = match[1].str(); + replacements.push_back(key); + template_copy = match.suffix().str(); + } + + return replacements; +} + +std::vector ReplaceHeaderPlaceholders( + const std::string& header_template, + const std::unordered_map& replacements) { + std::vector result; + size_t start = 0; + size_t end = header_template.find("}}"); + + while (end != std::string::npos) { + // Extract the part + std::string part = header_template.substr(start, end - start + 2); + + // Replace variables in this part + for (const auto& var : replacements) { + std::string placeholder = "{{" + var.first + "}}"; + size_t pos = part.find(placeholder); + if (pos != std::string::npos) { + part.replace(pos, placeholder.length(), var.second); + } + } + + // Trim whitespace + part.erase(0, part.find_first_not_of(" \t\n\r\f\v")); + part.erase(part.find_last_not_of(" \t\n\r\f\v") + 1); + + // Add to result if not empty + if (!part.empty()) { + result.push_back(part); + } + + // Move to next part + start = end + 2; + end = header_template.find("}}", start); + } + + // Process any remaining part + if (start < header_template.length()) { + std::string part = header_template.substr(start); + + // Replace variables in this part + for (const auto& var : replacements) { + std::string placeholder = "{{" + var.first + "}}"; + size_t pos = part.find(placeholder); + if (pos != std::string::npos) { + part.replace(pos, placeholder.length(), var.second); + } + } + + // Trim whitespace + part.erase(0, part.find_first_not_of(" \t\n\r\f\v")); + part.erase(part.find_last_not_of(" \t\n\r\f\v") + 1); + + if (!part.empty()) { + result.push_back(part); + } + } + return result; +} +} // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 34dacad9b..0d7ecbef1 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -4,6 +4,7 @@ #include #include #include +#include "helper.h" #include "utils/json_helper.h" #include "utils/logging_utils.h" namespace remote_engine { @@ -13,82 +14,6 @@ constexpr const int k400BadRequest = 400; constexpr const int k409Conflict = 409; constexpr const int k500InternalServerError = 500; constexpr const int kFileLoggerOption = 0; -bool is_anthropic(const std::string& model) { - return model.find("claude") != std::string::npos; -} - -std::vector GetReplacements(const std::string& header_template) { - std::vector replacements; - std::regex placeholder_regex(R"(\{\{(.*?)\}\})"); - std::smatch match; - - std::string template_copy = header_template; - while (std::regex_search(template_copy, match, placeholder_regex)) { - std::string key = match[1].str(); - replacements.push_back(key); - template_copy = match.suffix().str(); - } - - return replacements; -} - -std::vector ReplaceHeaderPlaceholder( - const std::string& header_template, - const std::unordered_map& replacements) { - std::vector result; - size_t start = 0; - size_t end = header_template.find("}}"); - - while (end != std::string::npos) { - // Extract the part - std::string part = header_template.substr(start, end - start + 2); - - // Replace variables in this part - for (const auto& var : replacements) { - std::string placeholder = "{{" + var.first + "}}"; - size_t pos = part.find(placeholder); - if (pos != std::string::npos) { - part.replace(pos, placeholder.length(), var.second); - } - } - - // Trim whitespace - part.erase(0, part.find_first_not_of(" \t\n\r\f\v")); - part.erase(part.find_last_not_of(" \t\n\r\f\v") + 1); - - // Add to result if not empty - if (!part.empty()) { - result.push_back(part); - } - - // Move to next part - start = end + 2; - end = header_template.find("}}", start); - } - - // Process any remaining part - if (start < header_template.length()) { - std::string part = header_template.substr(start); - - // Replace variables in this part - for (const auto& var : replacements) { - std::string placeholder = "{{" + var.first + "}}"; - size_t pos = part.find(placeholder); - if (pos != std::string::npos) { - part.replace(pos, placeholder.length(), var.second); - } - } - - // Trim whitespace - part.erase(0, part.find_first_not_of(" \t\n\r\f\v")); - part.erase(part.find_last_not_of(" \t\n\r\f\v") + 1); - - if (!part.empty()) { - result.push_back(part); - } - } - return result; -} constexpr const std::array kAnthropicModels = { "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", @@ -260,7 +185,7 @@ std::string ReplaceApiKeyPlaceholder(const std::string& templateStr, return result; } -std::vector ReplaceHeaderPlaceholder( +std::vector ReplaceHeaderPlaceholders( const std::string& template_str, Json::Value json_body) { CTL_DBG(template_str); auto keys = GetReplacements(template_str); @@ -272,7 +197,7 @@ std::vector ReplaceHeaderPlaceholder( replacements.insert({k, json_body[k].asString()}); } } - return ReplaceHeaderPlaceholder(template_str, replacements); + return ReplaceHeaderPlaceholders(template_str, replacements); } static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, @@ -401,7 +326,7 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, // model_config.url = ; // Optional fields if (auto s = config["header_template"]; s && !s.as().empty()) { - header_ = ReplaceHeaderPlaceholder(s.as(), body); + header_ = ReplaceHeaderPlaceholders(s.as(), body); for (auto const& h : header_) { CTL_DBG("header: " << h); } @@ -516,7 +441,7 @@ void RemoteEngine::LoadModel( if (json_body->isMember("metadata")) { if (!metadata_["header_template"].isNull()) { - header_ = ReplaceHeaderPlaceholder( + header_ = ReplaceHeaderPlaceholders( metadata_["header_template"].asString(), *json_body); for (auto const& h : header_) { CTL_DBG("header: " << h); diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc index 0d0529f87..3249e25d5 100644 --- a/engine/test/components/test_remote_engine.cc +++ b/engine/test/components/test_remote_engine.cc @@ -1,3 +1,7 @@ +#include +#include +#include +#include "extensions/remote-engine/helper.h" #include "extensions/template_renderer.h" #include "gtest/gtest.h" #include "utils/json_helper.h" @@ -122,4 +126,66 @@ TEST_F(RemoteEngineTest, OpenAiResponse) { EXPECT_EQ(data["created"].asInt(), res_json["created"].asInt()); EXPECT_EQ(data["choices"][0]["delta"]["content"].asString(), res_json["choices"][0]["delta"]["content"].asString()); +} + +TEST_F(RemoteEngineTest, HeaderTemplate) { + { + std::string header_template = + R"(x-api-key: {{api_key}} anthropic-version: {{version}})"; + Json::Value test_value; + test_value["api_key"] = "test"; + test_value["version"] = "test_version"; + std::unordered_map replacements; + auto r = remote_engine::GetReplacements(header_template); + for (auto s : r) { + if (test_value.isMember(s)) { + replacements.insert({s, test_value[s].asString()}); + } + } + + auto result = + remote_engine::ReplaceHeaderPlaceholders(header_template, replacements); + + EXPECT_EQ(result[0], "x-api-key: test"); + EXPECT_EQ(result[1], "anthropic-version: test_version"); + } + + { + std::string header_template = + R"(x-api-key: {{api_key}} anthropic-version: test_version)"; + Json::Value test_value; + test_value["api_key"] = "test"; + test_value["version"] = "test_version"; + std::unordered_map replacements; + auto r = remote_engine::GetReplacements(header_template); + for (auto s : r) { + if (test_value.isMember(s)) { + replacements.insert({s, test_value[s].asString()}); + } + } + + auto result = + remote_engine::ReplaceHeaderPlaceholders(header_template, replacements); + + EXPECT_EQ(result[0], "x-api-key: test"); + EXPECT_EQ(result[1], "anthropic-version: test_version"); + } + + { + std::string header_template = R"(Authorization: Bearer {{api_key}}")"; + Json::Value test_value; + test_value["api_key"] = "test"; + std::unordered_map replacements; + auto r = remote_engine::GetReplacements(header_template); + for (auto s : r) { + if (test_value.isMember(s)) { + replacements.insert({s, test_value[s].asString()}); + } + } + + auto result = + remote_engine::ReplaceHeaderPlaceholders(header_template, replacements); + + EXPECT_EQ(result[0], "Authorization: Bearer test"); + } } \ No newline at end of file From 3ade4e45e8198b90a63c0544760bea32db32c7de Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 8 Jan 2025 13:56:17 +0700 Subject: [PATCH 20/20] chore: unit test for anthropic response --- engine/test/components/test_remote_engine.cc | 53 ++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc index 3249e25d5..3bb3cdca3 100644 --- a/engine/test/components/test_remote_engine.cc +++ b/engine/test/components/test_remote_engine.cc @@ -128,6 +128,59 @@ TEST_F(RemoteEngineTest, OpenAiResponse) { res_json["choices"][0]["delta"]["content"].asString()); } +TEST_F(RemoteEngineTest, AnthropicResponse) { + std::string tpl = R"( + {% if input_request.stream %} + {"object": "chat.completion.chunk", "model": "{{ input_request.model }}", "choices": [{"index": 0, "delta": { {% if input_request.type == "message_start" %} "role": "assistant", "content": null {% else if input_request.type == "ping" %} "role": "assistant", "content": null {% else if input_request.type == "content_block_delta" %} "role": "assistant", "content": "{{ input_request.delta.text }}" {% else if input_request.type == "content_block_stop" %} "role": "assistant", "content": null {% else if input_request.type == "content_block_stop" %} "role": "assistant", "content": null {% endif %} }, {% if input_request.type == "content_block_stop" %} "finish_reason": "stop" {% else %} "finish_reason": null {% endif %} }]} + {% else %} + {"id": "{{ input_request.id }}", + "created": null, + "object": "chat.completion", + "model": "{{ input_request.model }}", + "choices": [{ + "index": 0, + "message": { + "role": "{{ input_request.role }}", + "content": {% if not input_request.content %} null {% else if input_request.content and input_request.content.0.type == "text" %} {{input_request.content.0.text}} {% else %} null {% endif %}, + "refusal": null }, + "logprobs": null, + "finish_reason": "{{ input_request.stop_reason }}" } ], + "usage": { + "prompt_tokens": {{ input_request.usage.input_tokens }}, + "completion_tokens": {{ input_request.usage.output_tokens }}, + "total_tokens": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, + "prompt_tokens_details": { "cached_tokens": 0 }, + "completion_tokens_details": { "reasoning_tokens": 0, "accepted_prediction_tokens": 0, "rejected_prediction_tokens": 0 } }, + "system_fingerprint": "fp_6b68a8204b"} + {% endif %})"; + std::string message = R"( + { + "content": [], + "id": "msg_01SckpnDyChcmmawQsWHr8CH", + "model": "claude-3-opus-20240229", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "stream": false, + "type": "message", + "usage": { + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "input_tokens": 130, + "output_tokens": 3 + } + })"; + auto data = json_helper::ParseJsonString(message); + + extensions::TemplateRenderer rdr; + auto res = rdr.Render(tpl, data); + + auto res_json = json_helper::ParseJsonString(res); + EXPECT_EQ(data["model"].asString(), res_json["model"].asString()); + EXPECT_EQ(data["created"].asInt(), res_json["created"].asInt()); + EXPECT_TRUE(res_json["choices"][0]["message"]["content"].isNull()); +} + TEST_F(RemoteEngineTest, HeaderTemplate) { { std::string header_template =