Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 15 additions & 34 deletions engine/commands/engine_init_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
#include "utils/archive_utils.h"
#include "utils/system_info_utils.h"
// clang-format on
#include "utils/cortex_utils.h"
#include "utils/cuda_toolkit_utils.h"
#include "utils/engine_matcher_utils.h"
#if defined(_WIN32) || defined(__linux__)
#include "utils/file_manager_utils.h"
#endif

namespace commands {

Expand Down Expand Up @@ -125,32 +124,6 @@ bool EngineInitCmd::Exec() const {

archive_utils::ExtractArchive(downloadedEnginePath.string(),
extract_path.string());
#if defined(_WIN32) || defined(__linux__)
// FIXME: hacky try to copy the file. Remove this when we are able to set the library path
auto engine_path = extract_path / engineName_;
LOG_INFO << "Source path: " << engine_path.string();
auto executable_path =
file_manager_utils::GetExecutableFolderContainerPath();
for (const auto& entry :
std::filesystem::recursive_directory_iterator(engine_path)) {
if (entry.is_regular_file() &&
entry.path().extension() != ".gz") {
std::filesystem::path relative_path =
std::filesystem::relative(entry.path(), engine_path);
std::filesystem::path destFile =
executable_path / relative_path;

std::filesystem::create_directories(destFile.parent_path());
std::filesystem::copy_file(
entry.path(), destFile,
std::filesystem::copy_options::overwrite_existing);

std::cout << "Copied: " << entry.path().filename().string()
<< " to " << destFile.string() << std::endl;
}
}
std::cout << "DLL copying completed successfully." << std::endl;
#endif

// remove the downloaded file
// TODO(any) Could not delete file on Windows because it is currently hold by httplib(?)
Expand Down Expand Up @@ -224,15 +197,23 @@ bool EngineInitCmd::Exec() const {

download_service.AddDownloadTask(
downloadCudaToolkitTask,
[](const std::string& absolute_path, bool unused) {
[this](const std::string& absolute_path, bool unused) {
LOG_DEBUG << "Downloaded cuda path: " << absolute_path;
// try to unzip the downloaded file
std::filesystem::path downloaded_path{absolute_path};

archive_utils::ExtractArchive(
absolute_path,
downloaded_path.parent_path().parent_path().string());

// TODO(any) This is a temporary fix. The issue will be fixed when we has CIs
// to pack CUDA dependecies into engine release
auto get_engine_path = [](std::string_view e) {
if (e == "cortex.llamacpp") {
return cortex_utils::kLlamaLibPath;
} else {
return cortex_utils::kTensorrtLlmPath;
}
};
std::string engine_path =
file_manager_utils::GetCortexDataPath().string() +
get_engine_path(engineName_);
archive_utils::ExtractArchive(absolute_path, engine_path);
try {
std::filesystem::remove(absolute_path);
} catch (std::exception& e) {
Expand Down
59 changes: 50 additions & 9 deletions engine/controllers/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include "trantor/utils/Logger.h"
#include "utils/cortex_utils.h"
#include "utils/cpuid/cpu_info.h"
#include "utils/logging_utils.h"
#include "utils/file_manager_utils.h"
#include "utils/logging_utils.h"

using namespace inferences;
using json = nlohmann::json;
Expand All @@ -21,11 +21,9 @@ constexpr static auto kTensorrtLlmEngine = "cortex.tensorrt-llm";
} // namespace

server::server() {

// Some default values for now below
// log_disable(); // Disable the log to file feature, reduce bloat for
// target
// system ()
#if defined(_WIN32)
SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS);
#endif
};

server::~server() {}
Expand Down Expand Up @@ -290,10 +288,46 @@ void server::LoadModel(const HttpRequestPtr& req,
}

std::string abs_path =
(getenv("ENGINE_PATH") ? getenv("ENGINE_PATH")
: file_manager_utils::GetCortexDataPath().string()) +
(getenv("ENGINE_PATH")
? getenv("ENGINE_PATH")
: file_manager_utils::GetCortexDataPath().string()) +
get_engine_path(engine_type);
std::cout << abs_path << std::endl;
#if defined(_WIN32)
// TODO(?) If we only allow to load an engine at a time, the logic is simpler.
// We would like to support running multiple engines at the same time. Therefore,
// the adding/removing dll directory logic is quite complicated:
// 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
// Unload the llamacpp dll directory then load the tensorrt-llm
// 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
// Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
// 3. Add dll directory if met other conditions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I understand this in more detail:

  • We are always loading the dll directory of the latest used engine
  • Is this due to our decision to keep per-engine dependencies?
  • Are we able to load dlls on an as-needed basis, without removing the "older" engine dlls?

I imagine this will come up often in demos - we will demo:

cortex run llama3.1
cortex run llama3.1:tensorrt-llm
cortex run llama3.1:onnx

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only load the engine's dll directory if engine is not loaded. In case of tensorrt-llm and llamacpp engine, it is a little bit tricky because the cuda dependencies may conflict so we need to remove before add the dll directory.


auto add_dll = [this](const std::string& e_type, const std::string& p) {
auto ws = std::wstring(p.begin(), p.end());
if (auto cookie = AddDllDirectory(ws.c_str()); cookie != 0) {
LOG_INFO << "Added dll directory: " << p;
engines_[e_type].cookie = cookie;
} else {
LOG_WARN << "Could not add dll directory: " << p;
}
};

if (IsEngineLoaded(kLlamaEngine) && engine_type == kTensorrtLlmEngine) {
// Remove llamacpp dll directory
if (!RemoveDllDirectory(engines_[kLlamaEngine].cookie)) {
LOG_INFO << "Could not remove dll directory: " << kLlamaEngine;
} else {
LOG_WARN << "Removed dll directory: " << kLlamaEngine;
}

add_dll(engine_type, abs_path);
} else if (IsEngineLoaded(kTensorrtLlmEngine) &&
engine_type == kLlamaEngine) {
// Do nothing
} else {
add_dll(engine_type, abs_path);
}
#endif
engines_[engine_type].dl =
std::make_unique<cortex_cpp::dylib>(abs_path, "engine");

Expand Down Expand Up @@ -349,6 +383,13 @@ void server::UnloadEngine(

EngineI* e = std::get<EngineI*>(engines_[engine_type].engine);
delete e;
#if defined(_WIN32)
if (!RemoveDllDirectory(engines_[engine_type].cookie)) {
LOG_WARN << "Could not remove dll directory: " << engine_type;
} else {
LOG_INFO << "Removed dll directory: " << engine_type;
}
#endif
engines_.erase(engine_type);
LOG_INFO << "Unloaded engine " + engine_type;
Json::Value res;
Expand Down
7 changes: 5 additions & 2 deletions engine/controllers/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
#include <variant>

#include "common/base.h"
#include "config/gguf_parser.h"
#include "config/yaml_config.h"
#include "cortex-common/EngineI.h"
#include "cortex-common/cortexpythoni.h"
#include "trantor/utils/SerialTaskQueue.h"
#include "config/yaml_config.h"
#include "config/gguf_parser.h"
#include "utils/dylib.h"
#include "utils/json.hpp"
#ifndef SERVER_VERBOSE
Expand Down Expand Up @@ -153,6 +153,9 @@ class server : public drogon::HttpController<server>,
struct EngineInfo {
std::unique_ptr<cortex_cpp::dylib> dl;
EngineV engine;
#if defined(_WIN32)
DLL_DIRECTORY_COOKIE cookie;
#endif
};
std::unordered_map<std::string, EngineInfo> engines_;
std::string cur_engine_type_;
Expand Down