diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index 7b0153f69..743b39cdc 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -7,11 +7,10 @@ #include "utils/archive_utils.h" #include "utils/system_info_utils.h" // clang-format on +#include "utils/cortex_utils.h" #include "utils/cuda_toolkit_utils.h" #include "utils/engine_matcher_utils.h" -#if defined(_WIN32) || defined(__linux__) #include "utils/file_manager_utils.h" -#endif namespace commands { @@ -125,32 +124,6 @@ bool EngineInitCmd::Exec() const { archive_utils::ExtractArchive(downloadedEnginePath.string(), extract_path.string()); -#if defined(_WIN32) || defined(__linux__) - // FIXME: hacky try to copy the file. Remove this when we are able to set the library path - auto engine_path = extract_path / engineName_; - LOG_INFO << "Source path: " << engine_path.string(); - auto executable_path = - file_manager_utils::GetExecutableFolderContainerPath(); - for (const auto& entry : - std::filesystem::recursive_directory_iterator(engine_path)) { - if (entry.is_regular_file() && - entry.path().extension() != ".gz") { - std::filesystem::path relative_path = - std::filesystem::relative(entry.path(), engine_path); - std::filesystem::path destFile = - executable_path / relative_path; - - std::filesystem::create_directories(destFile.parent_path()); - std::filesystem::copy_file( - entry.path(), destFile, - std::filesystem::copy_options::overwrite_existing); - - std::cout << "Copied: " << entry.path().filename().string() - << " to " << destFile.string() << std::endl; - } - } - std::cout << "DLL copying completed successfully." << std::endl; -#endif // remove the downloaded file // TODO(any) Could not delete file on Windows because it is currently hold by httplib(?) @@ -224,15 +197,23 @@ bool EngineInitCmd::Exec() const { download_service.AddDownloadTask( downloadCudaToolkitTask, - [](const std::string& absolute_path, bool unused) { + [this](const std::string& absolute_path, bool unused) { LOG_DEBUG << "Downloaded cuda path: " << absolute_path; // try to unzip the downloaded file std::filesystem::path downloaded_path{absolute_path}; - - archive_utils::ExtractArchive( - absolute_path, - downloaded_path.parent_path().parent_path().string()); - + // TODO(any) This is a temporary fix. The issue will be fixed when we has CIs + // to pack CUDA dependecies into engine release + auto get_engine_path = [](std::string_view e) { + if (e == "cortex.llamacpp") { + return cortex_utils::kLlamaLibPath; + } else { + return cortex_utils::kTensorrtLlmPath; + } + }; + std::string engine_path = + file_manager_utils::GetCortexDataPath().string() + + get_engine_path(engineName_); + archive_utils::ExtractArchive(absolute_path, engine_path); try { std::filesystem::remove(absolute_path); } catch (std::exception& e) { diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 42a7595be..ee9951968 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -7,8 +7,8 @@ #include "trantor/utils/Logger.h" #include "utils/cortex_utils.h" #include "utils/cpuid/cpu_info.h" -#include "utils/logging_utils.h" #include "utils/file_manager_utils.h" +#include "utils/logging_utils.h" using namespace inferences; using json = nlohmann::json; @@ -21,11 +21,9 @@ constexpr static auto kTensorrtLlmEngine = "cortex.tensorrt-llm"; } // namespace server::server() { - - // Some default values for now below - // log_disable(); // Disable the log to file feature, reduce bloat for - // target - // system () +#if defined(_WIN32) + SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS); +#endif }; server::~server() {} @@ -290,10 +288,46 @@ void server::LoadModel(const HttpRequestPtr& req, } std::string abs_path = - (getenv("ENGINE_PATH") ? getenv("ENGINE_PATH") - : file_manager_utils::GetCortexDataPath().string()) + + (getenv("ENGINE_PATH") + ? getenv("ENGINE_PATH") + : file_manager_utils::GetCortexDataPath().string()) + get_engine_path(engine_type); - std::cout << abs_path << std::endl; +#if defined(_WIN32) + // TODO(?) If we only allow to load an engine at a time, the logic is simpler. + // We would like to support running multiple engines at the same time. Therefore, + // the adding/removing dll directory logic is quite complicated: + // 1. If llamacpp is loaded and new requested engine is tensorrt-llm: + // Unload the llamacpp dll directory then load the tensorrt-llm + // 2. If tensorrt-llm is loaded and new requested engine is llamacpp: + // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) + // 3. Add dll directory if met other conditions + + auto add_dll = [this](const std::string& e_type, const std::string& p) { + auto ws = std::wstring(p.begin(), p.end()); + if (auto cookie = AddDllDirectory(ws.c_str()); cookie != 0) { + LOG_INFO << "Added dll directory: " << p; + engines_[e_type].cookie = cookie; + } else { + LOG_WARN << "Could not add dll directory: " << p; + } + }; + + if (IsEngineLoaded(kLlamaEngine) && engine_type == kTensorrtLlmEngine) { + // Remove llamacpp dll directory + if (!RemoveDllDirectory(engines_[kLlamaEngine].cookie)) { + LOG_INFO << "Could not remove dll directory: " << kLlamaEngine; + } else { + LOG_WARN << "Removed dll directory: " << kLlamaEngine; + } + + add_dll(engine_type, abs_path); + } else if (IsEngineLoaded(kTensorrtLlmEngine) && + engine_type == kLlamaEngine) { + // Do nothing + } else { + add_dll(engine_type, abs_path); + } +#endif engines_[engine_type].dl = std::make_unique(abs_path, "engine"); @@ -349,6 +383,13 @@ void server::UnloadEngine( EngineI* e = std::get(engines_[engine_type].engine); delete e; +#if defined(_WIN32) + if (!RemoveDllDirectory(engines_[engine_type].cookie)) { + LOG_WARN << "Could not remove dll directory: " << engine_type; + } else { + LOG_INFO << "Removed dll directory: " << engine_type; + } +#endif engines_.erase(engine_type); LOG_INFO << "Unloaded engine " + engine_type; Json::Value res; diff --git a/engine/controllers/server.h b/engine/controllers/server.h index 6d811192d..58d112435 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -17,11 +17,11 @@ #include #include "common/base.h" +#include "config/gguf_parser.h" +#include "config/yaml_config.h" #include "cortex-common/EngineI.h" #include "cortex-common/cortexpythoni.h" #include "trantor/utils/SerialTaskQueue.h" -#include "config/yaml_config.h" -#include "config/gguf_parser.h" #include "utils/dylib.h" #include "utils/json.hpp" #ifndef SERVER_VERBOSE @@ -153,6 +153,9 @@ class server : public drogon::HttpController, struct EngineInfo { std::unique_ptr dl; EngineV engine; +#if defined(_WIN32) + DLL_DIRECTORY_COOKIE cookie; +#endif }; std::unordered_map engines_; std::string cur_engine_type_;