Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions engine/config/model_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ struct ModelConfig {
bool text_model = std::numeric_limits<bool>::quiet_NaN();
std::string id;
std::vector<std::string> files;
std::string mmproj;
std::size_t created;
std::string object;
std::string owned_by = "";
Expand Down Expand Up @@ -338,6 +339,9 @@ struct ModelConfig {
files_array.append(file);
}
obj["files"] = files_array;
if (!mmproj.empty()) {
obj["mmproj"] = mmproj;
}

obj["created"] = static_cast<Json::UInt64>(created);
obj["object"] = object;
Expand Down
63 changes: 43 additions & 20 deletions engine/config/yaml_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {

try {
yaml_node_ = YAML::LoadFile(file_path);
auto nomalize_path = [](std::string p) {
std::replace(p.begin(), p.end(), '\\', '/');
return p;
};
// incase of model.yml file, we don't have files yet, create them
if (!yaml_node_["files"]) {
auto s = file_path;
// normalize path
std::replace(s.begin(), s.end(), '\\', '/');
auto s = nomalize_path(file_path);
std::vector<std::string> v;
if (yaml_node_["engine"] &&
(yaml_node_["engine"].as<std::string>() == kLlamaRepo ||
Expand All @@ -41,6 +43,18 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
// TODO(any) need to support mutiple gguf files
yaml_node_["files"] = v;
}

// add mmproj file to yml if exists
if (!yaml_node_["mmproj"]) {
auto s = nomalize_path(file_path);
auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
CTL_DBG("mmproj: " << abs_path);
auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
if (std::filesystem::exists(abs_path)) {
yaml_node_["mmproj"] = rel_path.string();
}
}

} catch (const YAML::BadFile& e) {
throw;
}
Expand Down Expand Up @@ -131,6 +145,8 @@ void YamlHandler::ModelConfigFromYaml() {
tmp.stop = yaml_node_["stop"].as<std::vector<std::string>>();
if (yaml_node_["files"])
tmp.files = yaml_node_["files"].as<std::vector<std::string>>();
if (yaml_node_["mmproj"])
tmp.mmproj = yaml_node_["mmproj"].as<std::string>();
if (yaml_node_["created"])
tmp.created = yaml_node_["created"].as<std::size_t>();

Expand Down Expand Up @@ -239,6 +255,9 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
if (model_config_.files.size() > 0)
yaml_node_["files"] = model_config_.files;

if (!model_config_.mmproj.empty())
yaml_node_["mmproj"] = model_config_.mmproj;

if (!std::isnan(static_cast<double>(model_config_.seed)))
yaml_node_["seed"] = model_config_.seed;
if (!std::isnan(model_config_.dynatemp_range))
Expand Down Expand Up @@ -301,17 +320,21 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
"Model ID which is used for request construct - should be "
"unique between models (author / quantization)");
out_file << format_utils::WriteKeyValue("name", yaml_node_["name"],
"metadata.general.name");
"metadata.general.name");
if (yaml_node_["version"]) {
out_file << "version: " << yaml_node_["version"].as<std::string>() << "\n";
out_file << "version: " << yaml_node_["version"].as<std::string>()
<< "\n";
}
if (yaml_node_["files"] && yaml_node_["files"].size()) {
out_file << "files: # Can be relative OR absolute local file "
"path\n";
"path\n";
for (const auto& source : yaml_node_["files"]) {
out_file << " - " << source << "\n";
}
}
if (yaml_node_["mmproj"]) {
out_file << "mmproj: " << yaml_node_["mmproj"].as<std::string>() << "\n";
}

out_file << "# END GENERAL GGUF METADATA\n";
out_file << "\n";
Expand All @@ -330,9 +353,9 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
out_file << "# BEGIN OPTIONAL\n";
out_file << format_utils::WriteKeyValue("size", yaml_node_["size"]);
out_file << format_utils::WriteKeyValue("stream", yaml_node_["stream"],
"Default true?");
"Default true?");
out_file << format_utils::WriteKeyValue("top_p", yaml_node_["top_p"],
"Ranges: 0 to 1");
"Ranges: 0 to 1");
out_file << format_utils::WriteKeyValue(
"temperature", yaml_node_["temperature"], "Ranges: 0 to 1");
out_file << format_utils::WriteKeyValue(
Expand All @@ -344,26 +367,26 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
"Should be default to context length");
out_file << format_utils::WriteKeyValue("seed", yaml_node_["seed"]);
out_file << format_utils::WriteKeyValue("dynatemp_range",
yaml_node_["dynatemp_range"]);
yaml_node_["dynatemp_range"]);
out_file << format_utils::WriteKeyValue("dynatemp_exponent",
yaml_node_["dynatemp_exponent"]);
yaml_node_["dynatemp_exponent"]);
out_file << format_utils::WriteKeyValue("top_k", yaml_node_["top_k"]);
out_file << format_utils::WriteKeyValue("min_p", yaml_node_["min_p"]);
out_file << format_utils::WriteKeyValue("tfs_z", yaml_node_["tfs_z"]);
out_file << format_utils::WriteKeyValue("typ_p", yaml_node_["typ_p"]);
out_file << format_utils::WriteKeyValue("repeat_last_n",
yaml_node_["repeat_last_n"]);
yaml_node_["repeat_last_n"]);
out_file << format_utils::WriteKeyValue("repeat_penalty",
yaml_node_["repeat_penalty"]);
yaml_node_["repeat_penalty"]);
out_file << format_utils::WriteKeyValue("mirostat", yaml_node_["mirostat"]);
out_file << format_utils::WriteKeyValue("mirostat_tau",
yaml_node_["mirostat_tau"]);
yaml_node_["mirostat_tau"]);
out_file << format_utils::WriteKeyValue("mirostat_eta",
yaml_node_["mirostat_eta"]);
yaml_node_["mirostat_eta"]);
out_file << format_utils::WriteKeyValue("penalize_nl",
yaml_node_["penalize_nl"]);
yaml_node_["penalize_nl"]);
out_file << format_utils::WriteKeyValue("ignore_eos",
yaml_node_["ignore_eos"]);
yaml_node_["ignore_eos"]);
out_file << format_utils::WriteKeyValue("n_probs", yaml_node_["n_probs"]);
out_file << format_utils::WriteKeyValue("min_keep", yaml_node_["min_keep"]);
out_file << format_utils::WriteKeyValue("grammar", yaml_node_["grammar"]);
Expand All @@ -374,7 +397,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
out_file << "# BEGIN MODEL LOAD PARAMETERS\n";
out_file << "# BEGIN REQUIRED\n";
out_file << format_utils::WriteKeyValue("engine", yaml_node_["engine"],
"engine to run model");
"engine to run model");
out_file << "prompt_template:";
out_file << " " << yaml_node_["prompt_template"] << "\n";
out_file << "# END REQUIRED\n";
Expand All @@ -384,11 +407,11 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
"ctx_len", yaml_node_["ctx_len"],
"llama.context_length | 0 or undefined = loaded from model");
out_file << format_utils::WriteKeyValue("n_parallel",
yaml_node_["n_parallel"]);
yaml_node_["n_parallel"]);
out_file << format_utils::WriteKeyValue("cpu_threads",
yaml_node_["cpu_threads"]);
yaml_node_["cpu_threads"]);
out_file << format_utils::WriteKeyValue("ngl", yaml_node_["ngl"],
"Undefined = loaded from model");
"Undefined = loaded from model");
out_file << "# END OPTIONAL\n";
out_file << "# END MODEL LOAD PARAMETERS\n";

Expand Down
4 changes: 2 additions & 2 deletions engine/controllers/models.cc
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,8 @@ void Models::StartModel(
auto model_handle = (*(req->getJsonObject())).get("model", "").asString();

std::optional<std::string> mmproj;
if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) {
mmproj = o.asString();
if (auto& o = (*(req->getJsonObject())); o.isMember("mmproj")) {
mmproj = o["mmproj"].asString();
}

auto bypass_llama_model_path = false;
Expand Down
2 changes: 1 addition & 1 deletion engine/services/hardware_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ void HardwareService::UpdateHardwareInfos() {
};
for (auto const& he : b.value()) {
if (!exists(he.uuid)) {
db_service_->DeleteHardwareEntry(he.uuid);
(void)db_service_->DeleteHardwareEntry(he.uuid);
}
}

Expand Down
33 changes: 22 additions & 11 deletions engine/services/model_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ ModelService::ModelService(std::shared_ptr<DatabaseService> db_service,
inference_svc_(inference_service),
engine_svc_(engine_svc),
task_queue_(task_queue) {
// ProcessBgrTasks();
};
// ProcessBgrTasks();
};

void ModelService::ForceIndexingModelList() {
CTL_INF("Force indexing model list");
Expand Down Expand Up @@ -947,6 +947,15 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
LOG_WARN << "model_path is empty";
return StartModelResult{.success = false};
}
if (!mc.mmproj.empty()) {
#if defined(_WIN32)
json_data["mmproj"] = cortex::wc::WstringToUtf8(
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).wstring());
#else
json_data["mmproj"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
#endif
}
json_data["system_prompt"] = mc.system_template;
json_data["user_prompt"] = mc.user_template;
json_data["ai_prompt"] = mc.ai_template;
Expand Down Expand Up @@ -996,16 +1005,18 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
auto data = std::get<1>(ir);

if (status == drogon::k200OK) {
// start model successfully, we store the metadata so we can use
// start model successfully, in case not vision model, we store the metadata so we can use
// for each inference
auto metadata_res = GetModelMetadata(model_handle);
if (metadata_res.has_value()) {
loaded_model_metadata_map_.emplace(model_handle,
std::move(metadata_res.value()));
CTL_INF("Successfully stored metadata for model " << model_handle);
} else {
CTL_WRN("Failed to get metadata for model " << model_handle << ": "
<< metadata_res.error());
if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) {
auto metadata_res = GetModelMetadata(model_handle);
if (metadata_res.has_value()) {
loaded_model_metadata_map_.emplace(model_handle,
std::move(metadata_res.value()));
CTL_INF("Successfully stored metadata for model " << model_handle);
} else {
CTL_WRN("Failed to get metadata for model " << model_handle << ": "
<< metadata_res.error());
}
}

return StartModelResult{.success = true,
Expand Down
Loading