Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docs/docs/cli/models/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,11 @@ For example, it returns the following:w

| Option | Description | Required | Default value | Example |
|---------------------------|----------------------------------------------------|----------|---------------|----------------------|
| `-h`, `--help` | Display help for command. | No | - | `-h` |
<!-- | `-f`, `--format <format>` | Specify output format for the models list. | No | `json` | `-f json` | -->
| `-h`, `--help` | Display help for command. | No | - | `-h` |
| `-e`, `--engine` | Display engines. | No | - | `--engine` |
| `-v`, `--version` | Display version for model. | No | - | `--version` |
| `--cpu_mode` | Display CPU mode. | No | - | `--cpu_mode` |
| `--gpu_mode` | Display GPU mode. | No | - | `--gpu_mode` |

## `cortex models start`
:::info
Expand Down
53 changes: 53 additions & 0 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -3940,6 +3940,55 @@
},
"required": ["description", "name", "productName", "status"]
},
"CpuModeDto": {
"type": "object",
"properties": {
"ram": {
"type": "number",
"example": 1024
}
}
},
"GpuModeDto": {
"type": "object",
"properties": {
"ram": {
"type": "number",
"example": 1024
},
"vram": {
"type": "number",
"example": 1024
},
"ngl": {
"type": "number",
"example": 30
},
"context_length": {
"type": "number",
"example": 4096
},
"recommend_ngl": {
"type": "number",
"example": 33
}
}
},
"RecommendDto": {
"type": "object",
"properties": {
"cpu_mode": {
"type": "object",
"$ref": "#/components/schemas/CpuModeDto"
},
"gpu_mode": {
"type": "array",
"items": {
"$ref": "#/components/schemas/GPUDto"
}
}
}
},
"ModelDto": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -4064,6 +4113,10 @@
"type": "string",
"description": "The engine to use.",
"example": "llamacpp"
},
"recommendation": {
"type": "object",
"$ref": "#/components/schemas/RecommendDto"
}
},
"required": ["id"]
Expand Down
13 changes: 9 additions & 4 deletions engine/cli/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,19 @@ void CommandLineParser::SetupModelCommands() {
"Display engine");
list_models_cmd->add_flag("-v,--version", cml_data_.display_version,
"Display version");
list_models_cmd->add_flag("--cpu_mode", cml_data_.display_cpu_mode,
"Display cpu mode");
list_models_cmd->add_flag("--gpu_mode", cml_data_.display_gpu_mode,
"Display gpu mode");
list_models_cmd->group(kSubcommands);
list_models_cmd->callback([this]() {
if (std::exchange(executed_, true))
return;
commands::ModelListCmd().Exec(cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort),
cml_data_.filter, cml_data_.display_engine,
cml_data_.display_version);
commands::ModelListCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.filter,
cml_data_.display_engine, cml_data_.display_version,
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode);
});

auto get_models_cmd =
Expand Down
2 changes: 2 additions & 0 deletions engine/cli/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class CommandLineParser {
// for model list
bool display_engine = false;
bool display_version = false;
bool display_cpu_mode = false;
bool display_gpu_mode = false;
std::string filter = "";
std::string log_level = "INFO";

Expand Down
31 changes: 30 additions & 1 deletion engine/cli/commands/model_list_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ using Row_t =

void ModelListCmd::Exec(const std::string& host, int port,
const std::string& filter, bool display_engine,
bool display_version) {
bool display_version, bool display_cpu_mode,
bool display_gpu_mode) {
// Start server if server is not started yet
if (!commands::IsServerAlive(host, port)) {
CLI_LOG("Starting server ...");
Expand All @@ -39,6 +40,12 @@ void ModelListCmd::Exec(const std::string& host, int port,
column_headers.push_back("Version");
}

if (display_cpu_mode) {
column_headers.push_back("CPU Mode");
}
if (display_gpu_mode) {
column_headers.push_back("GPU Mode");
}
Row_t header{column_headers.begin(), column_headers.end()};
table.add_row(header);
table.format().font_color(Color::green);
Expand Down Expand Up @@ -77,6 +84,28 @@ void ModelListCmd::Exec(const std::string& host, int port,
row.push_back(v["version"].asString());
}

if (auto& r = v["recommendation"]; !r.isNull()) {
if (display_cpu_mode) {
if (!r["cpu_mode"].isNull()) {
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
}
}

if (display_gpu_mode) {
if (!r["gpu_mode"].isNull()) {
std::string s;
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
" - ";
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
s += "recommended ngl: " +
r["gpu_mode"][0]["recommend_ngl"].asString();
row.push_back(s);
}
}
}

table.add_row({row.begin(), row.end()});
}
}
Expand Down
3 changes: 2 additions & 1 deletion engine/cli/commands/model_list_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ namespace commands {
class ModelListCmd {
public:
void Exec(const std::string& host, int port, const std::string& filter,
bool display_engine = false, bool display_version = false);
bool display_engine = false, bool display_version = false,
bool display_cpu_mode = false, bool display_gpu_mode = false);
};
} // namespace commands
4 changes: 4 additions & 0 deletions engine/controllers/models.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ void Models::ListModel(
Json::Value obj = model_config.ToJson();
obj["id"] = model_entry.model;
obj["model"] = model_entry.model;
auto es = model_service_->GetEstimation(model_entry.model);
if (es.has_value()) {
obj["recommendation"] = hardware::ToJson(es.value());
}
data.append(std::move(obj));
yaml_handler.Reset();
} catch (const std::exception& e) {
Expand Down
1 change: 1 addition & 0 deletions engine/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "utils/event_processor.h"
#include "utils/file_logger.h"
#include "utils/file_manager_utils.h"
#include "utils/hardware/gguf/gguf_file_estimate.h"
#include "utils/logging_utils.h"
#include "utils/system_info_utils.h"
#include "utils/widechar_conv.h"
Expand Down
Loading
Loading