Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
222e819
Init model.list utils
nguyenhoangthuan99 Sep 18, 2024
3a4aa20
Add cmakelist compile
nguyenhoangthuan99 Sep 18, 2024
78dce12
Add cmakelist compile
nguyenhoangthuan99 Sep 18, 2024
96bbb2f
Fix CI build windows
nguyenhoangthuan99 Sep 18, 2024
d13e8d8
add unitest
nguyenhoangthuan99 Sep 18, 2024
41cfddd
Merge branch 'dev' into feat/model.list-utils
nguyenhoangthuan99 Sep 18, 2024
b3a258c
Add test
nguyenhoangthuan99 Sep 18, 2024
d2eb1cb
Merge branch 'feat/model.list-utils' of github.com:janhq/cortex into …
nguyenhoangthuan99 Sep 18, 2024
6f6bb91
Merge branch 'dev' of github.com:janhq/cortex into feat/gguf-yaml-parser
nguyenhoangthuan99 Sep 18, 2024
654a979
Update yaml and gguf parser
nguyenhoangthuan99 Sep 18, 2024
6b107a9
Fix update wrong params
nguyenhoangthuan99 Sep 18, 2024
e92ed40
Merge branch 'dev' of github.com:janhq/cortex into feat/gguf-yaml-parser
nguyenhoangthuan99 Sep 18, 2024
ac4282a
add unitests
nguyenhoangthuan99 Sep 18, 2024
2e627e1
Merge branch 'dev' into feat/gguf-yaml-parser
nguyenhoangthuan99 Sep 18, 2024
a162636
add unitest for gguf
nguyenhoangthuan99 Sep 18, 2024
098423b
Merge branch 'feat/gguf-yaml-parser' of github.com:janhq/cortex into …
nguyenhoangthuan99 Sep 18, 2024
d919516
add unitest for gguf
nguyenhoangthuan99 Sep 18, 2024
22e4408
Merge branch 'dev' of github.com:janhq/cortex into feat/gguf-yaml-parser
nguyenhoangthuan99 Sep 19, 2024
8a2fc36
Fix comment
nguyenhoangthuan99 Sep 19, 2024
7fa1e61
Fix build test fail
nguyenhoangthuan99 Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions engine/config/gguf_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,23 @@ void GGUFHandler::ModelConfigFromMetadata() {
model_config_.created = std::time(nullptr);
model_config_.model = "model";
model_config_.owned_by = "";
model_config_.version;
model_config_.seed = -1;
model_config_.dynatemp_range = 0.0f;
model_config_.dynatemp_exponent = 1.0f;
model_config_.top_k = 40;
model_config_.min_p = 0.05f;
model_config_.tfs_z = 1.0f;
model_config_.typ_p = 1.0f;
model_config_.repeat_last_n = 64;
model_config_.repeat_penalty = 1.0f;
model_config_.mirostat = false;
model_config_.mirostat_tau = 5.0f;
model_config_.mirostat_eta = 0.1f;
model_config_.penalize_nl = false;
model_config_.ignore_eos = false;
model_config_.n_probs = 0;
model_config_.min_keep = 0;
model_config_.grammar = "";

// Get version, bos, eos id, contex_len, ngl from meta data
for (const auto& [key, value] : metadata_uint8_) {
Expand Down Expand Up @@ -522,7 +538,7 @@ void GGUFHandler::ModelConfigFromMetadata() {
for (const auto& [key, value] : metadata_string_) {
if (key.compare("general.name") == 0) {
name = std::regex_replace(value, std::regex(" "), "-");
} else if (key.compare("tokenizer.chat_template") == 0) {
} else if (key.find("chat_template") != std::string::npos) {
if (value.compare(ZEPHYR_JINJA) == 0) {
chat_template =
"<|system|>\n{system_message}</s>\n<|user|>\n{prompt}</"
Expand Down Expand Up @@ -564,12 +580,21 @@ void GGUFHandler::ModelConfigFromMetadata() {
}
}

eos_string = tokens[eos_token];
bos_string = tokens[bos_token];
stop.push_back(std::move(eos_string));
try {
if (tokens.size() > eos_token) {
eos_string = tokens[eos_token];
stop.push_back(std::move(eos_string));
} else {
LOG_ERROR << "Can't find stop token";
}
} catch (const std::exception& e) {
LOG_ERROR << "Can't find stop token";
}

model_config_.stop = std::move(stop);

if (chat_template.empty())
chat_template =
"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n{prompt}[/INST]";
model_config_.prompt_template = std::move(chat_template);
model_config_.name = name;
model_config_.model = name;
Expand Down
18 changes: 18 additions & 0 deletions engine/config/model_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,23 @@ struct ModelConfig {
std::size_t created;
std::string object;
std::string owned_by = "";

int seed = -1;
float dynatemp_range = 0.0f;
float dynatemp_exponent = 1.0f;
int top_k = 40;
float min_p = 0.05f;
float tfs_z = 1.0f;
float typ_p = 1.0f;
int repeat_last_n = 64;
float repeat_penalty = 1.0f;
bool mirostat = false;
float mirostat_tau = 5.0f;
float mirostat_eta = 0.1f;
bool penalize_nl = false;
bool ignore_eos = false;
int n_probs = 0;
int min_keep = 0;
std::string grammar;
};
} // namespace config
165 changes: 163 additions & 2 deletions engine/config/yaml_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
std::vector<std::string> v;
if (yaml_node_["engine"] &&
yaml_node_["engine"].as<std::string>() == "cortex.llamacpp") {
// TODO: change prefix to models:// with source from cortexso
v.emplace_back(s.substr(0, s.find_last_of('/')) + "/model.gguf");
} else {
v.emplace_back(s.substr(0, s.find_last_of('/')));
Expand All @@ -36,7 +37,6 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
std::cerr << "Failed to read file: " << e.what() << std::endl;
throw;
}
ModelConfigFromYaml();
}
void YamlHandler::SplitPromptTemplate(ModelConfig& mc) {
if (mc.prompt_template.size() > 0) {
Expand Down Expand Up @@ -119,6 +119,41 @@ void YamlHandler::ModelConfigFromYaml() {
tmp.files = yaml_node_["files"].as<std::vector<std::string>>();
if (yaml_node_["created"])
tmp.created = yaml_node_["created"].as<std::size_t>();

if (yaml_node_["seed"])
tmp.seed = yaml_node_["seed"].as<int>();
if (yaml_node_["dynatemp_range"])
tmp.dynatemp_range = yaml_node_["dynatemp_range"].as<float>();
if (yaml_node_["dynatemp_exponent"])
tmp.dynatemp_exponent = yaml_node_["dynatemp_exponent"].as<float>();
if (yaml_node_["top_k"])
tmp.top_k = yaml_node_["top_k"].as<int>();
if (yaml_node_["min_p"])
tmp.min_p = yaml_node_["min_p"].as<float>();
if (yaml_node_["tfs_z"])
tmp.tfs_z = yaml_node_["tfs_z"].as<float>();
if (yaml_node_["typ_p"])
tmp.typ_p = yaml_node_["typ_p"].as<float>();
if (yaml_node_["repeat_last_n"])
tmp.repeat_last_n = yaml_node_["repeat_last_n"].as<int>();
if (yaml_node_["repeat_penalty"])
tmp.repeat_penalty = yaml_node_["repeat_penalty"].as<float>();
if (yaml_node_["mirostat"])
tmp.mirostat = yaml_node_["mirostat"].as<bool>();
if (yaml_node_["mirostat_tau"])
tmp.mirostat_tau = yaml_node_["mirostat_tau"].as<float>();
if (yaml_node_["mirostat_eta"])
tmp.mirostat_eta = yaml_node_["mirostat_eta"].as<float>();
if (yaml_node_["penalize_nl"])
tmp.penalize_nl = yaml_node_["penalize_nl"].as<bool>();
if (yaml_node_["ignore_eos"])
tmp.ignore_eos = yaml_node_["ignore_eos"].as<bool>();
if (yaml_node_["n_probs"])
tmp.n_probs = yaml_node_["n_probs"].as<int>();
if (yaml_node_["min_keep"])
tmp.min_keep = yaml_node_["min_keep"].as<int>();
if (yaml_node_["grammar"])
tmp.grammar = yaml_node_["grammar"].as<std::string>();
} catch (const std::exception& e) {
std::cerr << "Error when load model config : " << e.what() << std::endl;
std::cerr << "Revert ..." << std::endl;
Expand Down Expand Up @@ -185,6 +220,42 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
yaml_node_["stop"] = model_config_.stop;
if (model_config_.files.size() > 0)
yaml_node_["files"] = model_config_.files;

if (!std::isnan(static_cast<double>(model_config_.seed)))
yaml_node_["seed"] = model_config_.seed;
if (!std::isnan(model_config_.dynatemp_range))
yaml_node_["dynatemp_range"] = model_config_.dynatemp_range;
if (!std::isnan(model_config_.dynatemp_exponent))
yaml_node_["dynatemp_exponent"] = model_config_.dynatemp_exponent;
if (!std::isnan(static_cast<double>(model_config_.top_k)))
yaml_node_["top_k"] = model_config_.top_k;
if (!std::isnan(model_config_.min_p))
yaml_node_["min_p"] = model_config_.min_p;
if (!std::isnan(model_config_.tfs_z))
yaml_node_["tfs_z"] = model_config_.tfs_z;
if (!std::isnan(model_config_.typ_p))
yaml_node_["typ_p"] = model_config_.typ_p;
if (!std::isnan(static_cast<double>(model_config_.repeat_last_n)))
yaml_node_["repeat_last_n"] = model_config_.repeat_last_n;
if (!std::isnan(model_config_.repeat_penalty))
yaml_node_["repeat_penalty"] = model_config_.repeat_penalty;
if (!std::isnan(static_cast<double>(model_config_.mirostat)))
yaml_node_["mirostat"] = model_config_.mirostat;
if (!std::isnan(model_config_.mirostat_tau))
yaml_node_["mirostat_tau"] = model_config_.mirostat_tau;
if (!std::isnan(model_config_.mirostat_eta))
yaml_node_["mirostat_eta"] = model_config_.mirostat_eta;
if (!std::isnan(static_cast<double>(model_config_.penalize_nl)))
yaml_node_["penalize_nl"] = model_config_.penalize_nl;
if (!std::isnan(static_cast<double>(model_config_.ignore_eos)))
yaml_node_["ignore_eos"] = model_config_.ignore_eos;
if (!std::isnan(static_cast<double>(model_config_.n_probs)))
yaml_node_["n_probs"] = model_config_.n_probs;
if (!std::isnan(static_cast<double>(model_config_.min_keep)))
yaml_node_["min_keep"] = model_config_.min_keep;
if (!model_config_.grammar.empty())
yaml_node_["grammar"] = model_config_.grammar;

yaml_node_["created"] = std::time(nullptr);
} catch (const std::exception& e) {
std::cerr << "Error when update model config : " << e.what() << std::endl;
Expand All @@ -200,7 +271,97 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const {
if (!outFile) {
throw std::runtime_error("Failed to open output file.");
}
outFile << yaml_node_;
// Helper function to write a key-value pair with an optional comment
auto writeKeyValue = [&](const std::string& key, const YAML::Node& value,
const std::string& comment = "") {
if (!value)
return;
outFile << key << ": " << value;
if (!comment.empty()) {
outFile << " # " << comment;
}
outFile << "\n";
};

// Write GENERAL GGUF METADATA
outFile << "# BEGIN GENERAL GGUF METADATA\n";
writeKeyValue("id", yaml_node_["id"],
"Model ID unique between models (author / quantization)");
writeKeyValue("model", yaml_node_["model"],
"Model ID which is used for request construct - should be "
"unique between models (author / quantization)");
writeKeyValue("name", yaml_node_["name"], "metadata.general.name");
writeKeyValue("version", yaml_node_["version"], "metadata.version");
if (yaml_node_["files"] && yaml_node_["files"].size()) {
outFile << "files: # can be universal protocol (models://) "
"OR absolute local file path (file://) OR https remote URL "
"(https://)\n";
for (const auto& source : yaml_node_["files"]) {
outFile << " - " << source << "\n";
}
}

outFile << "# END GENERAL GGUF METADATA\n";
outFile << "\n";
// Write INFERENCE PARAMETERS
outFile << "# BEGIN INFERENCE PARAMETERS\n";
outFile << "# BEGIN REQUIRED\n";
if (yaml_node_["stop"] && yaml_node_["stop"].size()) {
outFile << "stop: # tokenizer.ggml.eos_token_id\n";
for (const auto& stop : yaml_node_["stop"]) {
outFile << " - " << stop << "\n";
}
}

outFile << "# END REQUIRED\n";
outFile << "\n";
outFile << "# BEGIN OPTIONAL\n";
writeKeyValue("stream", yaml_node_["stream"], "Default true?");
writeKeyValue("top_p", yaml_node_["top_p"], "Ranges: 0 to 1");
writeKeyValue("temperature", yaml_node_["temperature"], "Ranges: 0 to 1");
writeKeyValue("frequency_penalty", yaml_node_["frequency_penalty"],
"Ranges: 0 to 1");
writeKeyValue("presence_penalty", yaml_node_["presence_penalty"],
"Ranges: 0 to 1");
writeKeyValue("max_tokens", yaml_node_["max_tokens"],
"Should be default to context length");
writeKeyValue("seed", yaml_node_["seed"]);
writeKeyValue("dynatemp_range", yaml_node_["dynatemp_range"]);
writeKeyValue("dynatemp_exponent", yaml_node_["dynatemp_exponent"]);
writeKeyValue("top_k", yaml_node_["top_k"]);
writeKeyValue("min_p", yaml_node_["min_p"]);
writeKeyValue("tfs_z", yaml_node_["tfs_z"]);
writeKeyValue("typ_p", yaml_node_["typ_p"]);
writeKeyValue("repeat_last_n", yaml_node_["repeat_last_n"]);
writeKeyValue("repeat_penalty", yaml_node_["repeat_penalty"]);
writeKeyValue("mirostat", yaml_node_["mirostat"]);
writeKeyValue("mirostat_tau", yaml_node_["mirostat_tau"]);
writeKeyValue("mirostat_eta", yaml_node_["mirostat_eta"]);
writeKeyValue("penalize_nl", yaml_node_["penalize_nl"]);
writeKeyValue("ignore_eos", yaml_node_["ignore_eos"]);
writeKeyValue("n_probs", yaml_node_["n_probs"]);
writeKeyValue("min_keep", yaml_node_["min_keep"]);
writeKeyValue("grammar", yaml_node_["grammar"]);
outFile << "# END OPTIONAL\n";
outFile << "# END INFERENCE PARAMETERS\n";
outFile << "\n";
// Write MODEL LOAD PARAMETERS
outFile << "# BEGIN MODEL LOAD PARAMETERS\n";
outFile << "# BEGIN REQUIRED\n";
writeKeyValue("engine", yaml_node_["engine"], "engine to run model");
outFile << "prompt_template:";
outFile << " " << yaml_node_["prompt_template"] << "\n";
outFile << "# END REQUIRED\n";
outFile << "\n";
outFile << "# BEGIN OPTIONAL\n";
writeKeyValue("ctx_len", yaml_node_["ctx_len"],
"llama.context_length | 0 or undefined = loaded from model");
writeKeyValue("ngl", yaml_node_["ngl"], "Undefined = loaded from model");
outFile << "# END OPTIONAL\n";
outFile << "# END MODEL LOAD PARAMETERS\n";

// Write new configuration parameters

outFile.close();
} catch (const std::exception& e) {
std::cerr << "Error writing to file: " << e.what() << std::endl;
Expand Down
1 change: 0 additions & 1 deletion engine/controllers/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,6 @@ void CommandLineParser::EngineGet(CLI::App* parent) {
std::string desc = "Get " + engine_name + " status";

auto engine_get_cmd = get_cmd->add_subcommand(engine_name, desc);
engine_get_cmd->require_option();
engine_get_cmd->callback(
[engine_name] { commands::EngineGetCmd().Exec(engine_name); });
}
Expand Down
1 change: 1 addition & 0 deletions engine/e2e-test/test_cli_engine_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ def test_engines_get_onnx_should_be_incompatible_on_macos(self):
@pytest.mark.skipif(platform.system() != "Linux", reason="Linux-specific test")
def test_engines_get_onnx_should_be_incompatible_on_linux(self):
exit_code, output, error = run("Get engine", ["engines", "get", "cortex.onnx"])
print(output)
assert exit_code == 0, f"Get engine failed with error: {error}"
assert "Incompatible" in output, "cortex.onnx should be Incompatible on Linux"
2 changes: 1 addition & 1 deletion engine/e2e-test/test_cli_engine_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_engines_install_onnx_on_tensorrt_should_be_failed(self):

def test_engines_install_pre_release_llamacpp(self):
exit_code, output, error = run(
"Install Engine", ["engines", "install", "cortex.llamacpp", "-v", "v0.1.29"], timeout=60
"Install Engine", ["engines", "install", "cortex.llamacpp", "-v", "v0.1.29"], timeout=None
)
assert "Start downloading" in output, "Should display downloading message"
assert exit_code == 0, f"Install engine failed with error: {error}"
Expand Down
2 changes: 1 addition & 1 deletion engine/e2e-test/test_cli_engine_uninstall.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TestCliEngineUninstall:
def setup_and_teardown(self):
# Setup
# Preinstall llamacpp engine
run("Install Engine", ["engines", "install", "cortex.llamacpp"])
run("Install Engine", ["engines", "install", "cortex.llamacpp"],timeout=None)

yield

Expand Down
5 changes: 3 additions & 2 deletions engine/test/components/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ project(test-components)

enable_testing()

add_executable(${PROJECT_NAME} ${SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/modellist_utils.cc)
add_executable(${PROJECT_NAME} ${SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/modellist_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../config/yaml_config.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../config/gguf_parser.cc)

find_package(Drogon CONFIG REQUIRED)
find_package(GTest CONFIG REQUIRED)
find_package(yaml-cpp CONFIG REQUIRED)
find_package(jinja2cpp CONFIG REQUIRED)

target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main yaml-cpp::yaml-cpp
target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main yaml-cpp::yaml-cpp jinja2cpp
${CMAKE_THREAD_LIBS_INIT})
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)

Expand Down
Loading