Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wire up graceful server shutdown #5244

Merged
merged 1 commit into from Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 22 additions & 1 deletion examples/server/server.cpp
Expand Up @@ -28,6 +28,7 @@
#include <chrono>
#include <condition_variable>
#include <atomic>
#include <signal.h>

using json = nlohmann::json;

Expand Down Expand Up @@ -2511,6 +2512,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
}
}

std::function<void(int)> shutdown_handler;
inline void signal_handler(int signal) { shutdown_handler(signal); }

int main(int argc, char **argv)
{
#if SERVER_VERBOSE != 1
Expand Down Expand Up @@ -3128,8 +3132,25 @@ int main(int argc, char **argv)
std::placeholders::_2,
std::placeholders::_3
));
llama.queue_tasks.start_loop();

shutdown_handler = [&](int) {
llama.queue_tasks.terminate();
};

#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct sigaction sigint_action;
sigint_action.sa_handler = signal_handler;
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
#elif defined (_WIN32)
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
};
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
#endif
llama.queue_tasks.start_loop();
svr.stop();
t.join();

llama_backend_free();
Expand Down
20 changes: 17 additions & 3 deletions examples/server/utils.hpp
Expand Up @@ -220,6 +220,7 @@ inline std::string format_chatml(std::vector<json> messages)
struct llama_server_queue {
int id = 0;
std::mutex mutex_tasks;
bool running;
// queues
std::vector<task_server> queue_tasks;
std::vector<task_server> queue_tasks_deferred;
Expand Down Expand Up @@ -278,9 +279,18 @@ struct llama_server_queue {
queue_tasks_deferred.clear();
}

// Start the main loop. This call is blocking
[[noreturn]]
// end the start_loop routine
void terminate() {
{
std::unique_lock<std::mutex> lock(mutex_tasks);
running = false;
}
condition_tasks.notify_all();
}

// Start the main loop.
void start_loop() {
running = true;
while (true) {
// new task arrived
LOG_VERBOSE("have new task", {});
Expand Down Expand Up @@ -324,8 +334,12 @@ struct llama_server_queue {
{
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
if (!running) {
LOG_VERBOSE("ending start_loop", {});
return;
}
condition_tasks.wait(lock, [&]{
return !queue_tasks.empty();
return (!queue_tasks.empty() || !running);
});
}
}
Expand Down