diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc index e535fa704..67ed651a4 100644 --- a/engine/commands/chat_cmd.cc +++ b/engine/commands/chat_cmd.cc @@ -1,8 +1,10 @@ #include "chat_cmd.h" #include "httplib.h" +#include "cortex_upd_cmd.h" #include "trantor/utils/Logger.h" #include "utils/logging_utils.h" +#include "server_start_cmd.h" namespace commands { namespace { @@ -33,6 +35,15 @@ ChatCmd::ChatCmd(std::string host, int port, const config::ModelConfig& mc) : host_(std::move(host)), port_(port), mc_(mc) {} void ChatCmd::Exec(std::string msg) { + // Check if server is started + { + if (!commands::IsServerAlive(host_, port_)) { + CLI_LOG("Server is not started yet, please run `" + << commands::GetCortexBinary() << " start` to start server!"); + return; + } + } + auto address = host_ + ":" + std::to_string(port_); // Check if model is loaded // TODO(sang) only llamacpp support modelstatus for now diff --git a/engine/commands/cortex_upd_cmd.cc b/engine/commands/cortex_upd_cmd.cc index 7c2a1f423..3c892f6fc 100644 --- a/engine/commands/cortex_upd_cmd.cc +++ b/engine/commands/cortex_upd_cmd.cc @@ -15,7 +15,7 @@ void CortexUpdCmd::Exec(std::string v) { { auto config = file_manager_utils::GetCortexConfig(); httplib::Client cli(config.apiServerHost + ":" + config.apiServerPort); - auto res = cli.Get("/health/healthz"); + auto res = cli.Get("/healthz"); if (res) { CLI_LOG("Server is running. Stopping server before updating!"); commands::ServerStopCmd ssc(config.apiServerHost, diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc index 83d051891..2eb137dac 100644 --- a/engine/commands/model_start_cmd.cc +++ b/engine/commands/model_start_cmd.cc @@ -1,7 +1,10 @@ #include "model_start_cmd.h" +#include "cortex_upd_cmd.h" #include "httplib.h" #include "nlohmann/json.hpp" +#include "server_start_cmd.h" #include "trantor/utils/Logger.h" +#include "utils/file_manager_utils.h" #include "utils/logging_utils.h" namespace commands { @@ -10,7 +13,15 @@ ModelStartCmd::ModelStartCmd(std::string host, int port, : host_(std::move(host)), port_(port), mc_(mc) {} bool ModelStartCmd::Exec() { + // Check if server is started + if (!commands::IsServerAlive(host_, port_)) { + CLI_LOG("Server is not started yet, please run `" + << commands::GetCortexBinary() << " start` to start server!"); + return false; + } + httplib::Client cli(host_ + ":" + std::to_string(port_)); + nlohmann::json json_data; if (mc_.files.size() > 0) { // TODO(sang) support multiple files diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc index 64bc50d6f..1fb3706d7 100644 --- a/engine/commands/run_cmd.cc +++ b/engine/commands/run_cmd.cc @@ -2,7 +2,13 @@ #include "chat_cmd.h" #include "cmd_info.h" #include "config/yaml_config.h" +#include "engine_install_cmd.h" +#include "httplib.h" +#include "model_pull_cmd.h" #include "model_start_cmd.h" +#include "server_start_cmd.h" +#include "trantor/utils/Logger.h" +#include "utils/cortex_utils.h" #include "utils/file_manager_utils.h" namespace commands { @@ -15,7 +21,7 @@ void RunCmd::Exec() { // TODO should we clean all resource if something fails? // Check if model existed. If not, download it { - auto model_conf = model_service_.GetDownloadedModel(model_id_); + auto model_conf = model_service_.GetDownloadedModel(model_file + ".yaml"); if (!model_conf.has_value()) { model_service_.DownloadModel(model_id_); } @@ -35,6 +41,17 @@ void RunCmd::Exec() { } } + // Start server if it is not running + { + if (!commands::IsServerAlive(host_, port_)) { + CLI_LOG("Starting server ..."); + commands::ServerStartCmd ssc; + if(!ssc.Exec(host_, port_)) { + return; + } + } + } + // Start model config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile( diff --git a/engine/commands/server_start_cmd.cc b/engine/commands/server_start_cmd.cc new file mode 100644 index 000000000..613554c83 --- /dev/null +++ b/engine/commands/server_start_cmd.cc @@ -0,0 +1,106 @@ +#include "server_start_cmd.h" +#include "commands/cortex_upd_cmd.h" +#include "httplib.h" +#include "trantor/utils/Logger.h" +#include "utils/cortex_utils.h" +#include "utils/file_manager_utils.h" +#include "utils/logging_utils.h" + +namespace commands { + +namespace { +bool TryConnectToServer(const std::string& host, int port) { + constexpr const auto kMaxRetry = 3u; + auto count = 0u; + // Check if server is started + while (true) { + if (IsServerAlive(host, port)) + break; + // Wait for server up + std::this_thread::sleep_for(std::chrono::seconds(1)); + if (count++ == kMaxRetry) { + std::cerr << "Could not start server" << std::endl; + return false; + } + } + return true; +} +} // namespace + +ServerStartCmd::ServerStartCmd() {} + +bool ServerStartCmd::Exec(const std::string& host, int port) { +#if defined(_WIN32) || defined(_WIN64) + // Windows-specific code to create a new process + STARTUPINFO si; + PROCESS_INFORMATION pi; + + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + ZeroMemory(&pi, sizeof(pi)); + auto exe = commands::GetCortexBinary(); + std::string cmds = + cortex_utils::GetCurrentPath() + "/" + exe + " --start-server"; + // Create child process + if (!CreateProcess( + NULL, // No module name (use command line) + const_cast( + cmds.c_str()), // Command line (replace with your actual executable) + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance to FALSE + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi)) // Pointer to PROCESS_INFORMATION structure + { + std::cout << "Could not start server: " << GetLastError() << std::endl; + return false; + } else { + if(!TryConnectToServer(host, port)) { + return false; + } + std::cout << "Server started" << std::endl; + } + +#else + // Unix-like system-specific code to fork a child process + pid_t pid = fork(); + + if (pid < 0) { + // Fork failed + std::cerr << "Could not start server: " << std::endl; + return false; + } else if (pid == 0) { + // No need to configure LD_LIBRARY_PATH for macOS +#if !defined(__APPLE__) || !defined(__MACH__) + const char* name = "LD_LIBRARY_PATH"; + auto data = getenv(name); + std::string v; + if (auto g = getenv(name); g) { + v += g; + } + CTL_INF("LD_LIBRARY_PATH: " << v); + auto data_path = file_manager_utils::GetCortexDataPath(); + auto llamacpp_path = data_path / "engines" / "cortex.llamacpp/"; + auto trt_path = data_path / "engines" / "cortex.tensorrt-llm/"; + auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; + setenv(name, new_v.c_str(), true); + CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); +#endif + auto exe = commands::GetCortexBinary(); + std::string p = cortex_utils::GetCurrentPath() + "/" + exe; + execl(p.c_str(), exe.c_str(), "--start-server", (char*)0); + } else { + // Parent process + if(!TryConnectToServer(host, port)) { + return false; + } + std::cout << "Server started" << std::endl; + } +#endif + return true; +} + +}; // namespace commands \ No newline at end of file diff --git a/engine/commands/server_start_cmd.h b/engine/commands/server_start_cmd.h new file mode 100644 index 000000000..cb74c5ebc --- /dev/null +++ b/engine/commands/server_start_cmd.h @@ -0,0 +1,21 @@ +#pragma once +#include +#include "httplib.h" + +namespace commands { + +inline bool IsServerAlive(const std::string& host, int port) { + httplib::Client cli(host + ":" + std::to_string(port)); + auto res = cli.Get("/healthz"); + if (res && res->status == httplib::StatusCode::OK_200) { + return true; + } + return false; +} + +class ServerStartCmd { + public: + ServerStartCmd(); + bool Exec(const std::string& host, int port); +}; +} // namespace commands \ No newline at end of file diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 824f9dcc9..3046d1e70 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -13,6 +13,7 @@ #include "commands/model_start_cmd.h" #include "commands/model_stop_cmd.h" #include "commands/run_cmd.h" +#include "commands/server_start_cmd.h" #include "commands/server_stop_cmd.h" #include "config/yaml_config.h" #include "services/engine_service.h" @@ -174,6 +175,21 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { }); } + auto start_cmd = app_.add_subcommand("start", "Start the API server"); + int port = std::stoi(config.apiServerPort); + start_cmd->add_option("-p, --port", port, "Server port to listen"); + start_cmd->callback([&config, &port] { + if (port != stoi(config.apiServerPort)) { + CTL_INF("apiServerPort changed from " << config.apiServerPort << " to " + << port); + auto config_path = file_manager_utils::GetConfigurationPath(); + config.apiServerPort = std::to_string(port); + config_yaml_utils::DumpYamlConfig(config, config_path.string()); + } + commands::ServerStartCmd ssc; + ssc.Exec(config.apiServerHost, std::stoi(config.apiServerPort)); + }); + auto stop_cmd = app_.add_subcommand("stop", "Stop the API server"); stop_cmd->callback([&config] { @@ -208,6 +224,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { } CLI11_PARSE(app_, argc, argv); + if (argc == 1) { + CLI_LOG(app_.help()); + return true; + } // Check new update, only check for stable release for now #ifdef CORTEX_CPP_VERSION diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py index bedf8d39d..dd634d747 100644 --- a/engine/e2e-test/test_runner.py +++ b/engine/e2e-test/test_runner.py @@ -50,7 +50,10 @@ def start_server() -> bool: def start_server_nix() -> bool: executable = getExecutablePath() process = subprocess.Popen( - executable, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + [executable] + ['start', '-p', '3928'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True ) start_time = time.time() @@ -77,7 +80,7 @@ def start_server_nix() -> bool: def start_server_windows() -> bool: executable = getExecutablePath() process = subprocess.Popen( - executable, + [executable] + ['start', '-p', '3928'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, diff --git a/engine/main.cc b/engine/main.cc index 06513d638..bdac8148c 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -85,72 +85,6 @@ void RunServer() { // return 0; } -void ForkProcess() { -#if defined(_WIN32) || defined(_WIN64) - // Windows-specific code to create a new process - STARTUPINFO si; - PROCESS_INFORMATION pi; - - ZeroMemory(&si, sizeof(si)); - si.cb = sizeof(si); - ZeroMemory(&pi, sizeof(pi)); - auto exe = commands::GetCortexBinary(); - std::string cmds = - cortex_utils::GetCurrentPath() + "/" + exe + " --start-server"; - // Create child process - if (!CreateProcess( - NULL, // No module name (use command line) - const_cast( - cmds.c_str()), // Command line (replace with your actual executable) - NULL, // Process handle not inheritable - NULL, // Thread handle not inheritable - FALSE, // Set handle inheritance to FALSE - 0, // No creation flags - NULL, // Use parent's environment block - NULL, // Use parent's starting directory - &si, // Pointer to STARTUPINFO structure - &pi)) // Pointer to PROCESS_INFORMATION structure - { - std::cout << "Could not start server: " << GetLastError() << std::endl; - } else { - std::cout << "Server started" << std::endl; - } - -#else - // Unix-like system-specific code to fork a child process - pid_t pid = fork(); - - if (pid < 0) { - // Fork failed - std::cerr << "Could not start server: " << std::endl; - return; - } else if (pid == 0) { - // No need to configure LD_LIBRARY_PATH for macOS -#if !defined(__APPLE__) || !defined(__MACH__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto data_path = file_manager_utils::GetCortexDataPath(); - auto llamacpp_path = data_path / "engines" / "cortex.llamacpp/"; - auto trt_path = data_path / "engines" / "cortex.tensorrt-llm/"; - auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif - auto exe = commands::GetCortexBinary(); - std::string p = cortex_utils::GetCurrentPath() + "/" + exe; - execl(p.c_str(), exe.c_str(), "--start-server", (char*)0); - } else { - // Parent process - std::cout << "Server started" << std::endl; - } -#endif -} - int main(int argc, char* argv[]) { // Stop the program if the system is not supported auto system_info = system_info_utils::GetSystemInfo(); @@ -195,41 +129,34 @@ int main(int argc, char* argv[]) { } } - if (argc > 1) { - if (strcmp(argv[1], "--start-server") == 0) { - RunServer(); - return 0; - } else { - bool verbose = false; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "--verbose") == 0) { - verbose = true; - } - } + if (argc > 1 && strcmp(argv[1], "--start-server") == 0) { + RunServer(); + return 0; + } - trantor::FileLogger asyncFileLogger; - if (!verbose) { - auto config = file_manager_utils::GetCortexConfig(); - std::filesystem::create_directories( - std::filesystem::path(config.logFolderPath) / - std::filesystem::path(cortex_utils::logs_folder)); - asyncFileLogger.setFileName(config.logFolderPath + "/" + - cortex_utils::logs_cli_base_name); - asyncFileLogger.setMaxLines( - config.maxLogLines); // Keep last 100000 lines - asyncFileLogger.startLogging(); - trantor::Logger::setOutputFunction( - [&](const char* msg, const uint64_t len) { - asyncFileLogger.output_(msg, len); - }, - [&]() { asyncFileLogger.flush(); }); - } - CommandLineParser clp; - clp.SetupCommand(argc, argv); - return 0; + bool verbose = false; + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "--verbose") == 0) { + verbose = true; } } - - ForkProcess(); + trantor::FileLogger asyncFileLogger; + if (!verbose) { + auto config = file_manager_utils::GetCortexConfig(); + std::filesystem::create_directories( + std::filesystem::path(config.logFolderPath) / + std::filesystem::path(cortex_utils::logs_folder)); + asyncFileLogger.setFileName(config.logFolderPath + "/" + + cortex_utils::logs_cli_base_name); + asyncFileLogger.setMaxLines(config.maxLogLines); // Keep last 100000 lines + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output_(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); + } + CommandLineParser clp; + clp.SetupCommand(argc, argv); return 0; }