Skip to content

Commit

Permalink
[LibOS] Wake up thread that can handle SIGTERM on host-injected SIGTERM
Browse files Browse the repository at this point in the history
Some apps (e.g. MongoDB) create a separate thread that has a single job
-- to wait for a SIGTERM signal and then perform graceful app
termination. This special thread issues `sigtimedwait(SIGTERM)` and
waits forever for this signal. Other threads instead block this signal
(add it to their sigmask), so that they don't need to care about
reacting to this signal.

Previously, our LibOS would try to handle SIGTERM on the thread that got
chosen by the host Linux (the thread is arbitrarily chosen). But with
high probability, on such apps this thread will be the not-special-one,
and the app would effectively ignore SIGTERM.

Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
  • Loading branch information
dimakuv committed Feb 12, 2024
1 parent f35d8e0 commit 339ce59
Show file tree
Hide file tree
Showing 10 changed files with 152 additions and 25 deletions.
3 changes: 3 additions & 0 deletions libos/include/libos_signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ int init_signal_handling(void);

int append_signal(struct libos_thread* thread, siginfo_t* info);

/* callback for walk_thread_list() */
int wakeup_one_thread_on_signal(struct libos_thread* thread, void* arg);

/*!
* \brief Pop any of the pending signals allowed in \p mask.
*
Expand Down
32 changes: 32 additions & 0 deletions libos/src/bookkeep/libos_signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,23 @@ uintptr_t get_stack_for_sighandler(uintptr_t sp, bool use_altstack) {
return (uintptr_t)alt_stack->ss_sp + alt_stack->ss_size;
}

int wakeup_one_thread_on_signal(struct libos_thread* thread, void* arg) {
int sig = (int)(long)arg;

if (thread == get_cur_thread())
return 0;

lock(&thread->lock);
int ret = 0;
if (!__sigismember(&thread->signal_mask, sig)) {
thread_wakeup(thread);
ret = PalThreadResume(thread->pal_handle);
ret = ret < 0 ? pal_to_unix_errno(ret) : 1; /* "1" to finish one-shot thread walk */
}
unlock(&thread->lock);
return ret;
}

void pop_unblocked_signal(__sigset_t* mask, struct libos_signal* signal) {
assert(signal);
signal->siginfo.si_signo = 0;
Expand Down Expand Up @@ -696,14 +713,29 @@ void pop_unblocked_signal(__sigset_t* mask, struct libos_signal* signal) {
} else if (__atomic_load_n(&g_host_injected_signal, __ATOMIC_RELAXED) != 0) {
static_assert(SIGS_CNT < 0xff, "This code requires 0xff to be an invalid signal number");
lock(&current->lock);
bool sigterm_allowed_on_this_thread = false;
if (!__sigismember(mask ? : &current->signal_mask, SIGTERM)) {
sigterm_allowed_on_this_thread = true;
int sig = __atomic_exchange_n(&g_host_injected_signal, 0xff, __ATOMIC_RELAXED);
if (sig != 0xff) {
signal->siginfo.si_signo = sig;
signal->siginfo.si_code = SI_USER;
}
}
unlock(&current->lock);

if (!sigterm_allowed_on_this_thread) {
/* host delivered SIGTERM on the current thread but this thread blocked SIGTERM, need to
* find another thread that didn't block this signal and wake it up (this covers a
* common case of one dedicated app thread doing sigtimedwait(SIGTERM) while other
* threads mark SIGTERM as blocked) */
int ret = walk_thread_list(wakeup_one_thread_on_signal, /*arg=*/(void*)SIGTERM,
/*one_shot=*/true);
if (ret < 0 && ret != -ESRCH) {
log_error("error occured while trying to deliver SIGTERM signal to a thread (%s)",
unix_strerror(ret));
}
}
}
}

Expand Down
26 changes: 1 addition & 25 deletions libos/src/sys/libos_sigaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,30 +304,6 @@ long libos_syscall_rt_sigpending(__sigset_t* set, size_t sigsetsize) {
return 0;
}

static int _wakeup_one_thread(struct libos_thread* thread, void* arg) {
int sig = (int)(long)arg;
int ret = 0;

if (thread == get_cur_thread()) {
return ret;
}

lock(&thread->lock);

if (!__sigismember(&thread->signal_mask, sig)) {
thread_wakeup(thread);
ret = PalThreadResume(thread->pal_handle);
if (ret < 0) {
ret = pal_to_unix_errno(ret);
} else {
ret = 1;
}
}

unlock(&thread->lock);
return ret;
}

int kill_current_proc(siginfo_t* info) {
if (!info->si_signo) {
return 0;
Expand All @@ -351,7 +327,7 @@ int kill_current_proc(siginfo_t* info) {
unlock(&current->lock);
}

ret = walk_thread_list(_wakeup_one_thread, (void*)(long)sig, /*one_shot=*/true);
ret = walk_thread_list(wakeup_one_thread_on_signal, (void*)(long)sig, /*one_shot=*/true);
/* Ignore `-ESRCH` as this just means that currently no thread is able to handle the signal. */
if (ret < 0 && ret != -ESRCH) {
return ret;
Expand Down
1 change: 1 addition & 0 deletions libos/test/regression/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ tests = {
'sighandler_sigpipe': {},
'signal_multithread': {},
'sigprocmask_pending': {},
'sigterm_multithread': {},
'socket_ioctl': {},
'spinlock': {
'include_directories': include_directories(
Expand Down
64 changes: 64 additions & 0 deletions libos/test/regression/sigterm_multithread.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2024 Intel Corporation */

#define _XOPEN_SOURCE 700
#include <pthread.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "common.h"

static bool thread_started = false;

static void pthread_check(int x) {
if (x) {
errx(1, "pthread failed with %d", x);
}
}

static void ignore_sigterm(void) {
sigset_t blocked;
sigemptyset(&blocked);
sigaddset(&blocked, SIGTERM);
CHECK(sigprocmask(SIG_SETMASK, &blocked, NULL));
}

static void* thread_func(void* arg) {
ignore_sigterm();
__atomic_store_n(&thread_started, true, __ATOMIC_SEQ_CST);

sigset_t waitset;
sigemptyset(&waitset);
sigaddset(&waitset, SIGTERM);
int ret = sigwaitinfo(&waitset, /*info=*/NULL);
if (ret != SIGTERM)
errx(1, "expected SIGTERM but sigwaitinfo returned %d", ret);

exit(0);
}

int main(int argc, char** argv) {
ignore_sigterm();

pthread_t th;
pthread_check(pthread_create(&th, NULL, thread_func, NULL));

while (!__atomic_load_n(&thread_started, __ATOMIC_SEQ_CST))
;

/* helper thread started and waits for SIGTERM; inform the wrapper shell script */
puts("READY");
fflush(stdout);

/* emulate some processing; note that we can't use smth like `pause()` because in this case,
* both threads would wait in blocking host syscalls indefinitely, and Gramine currently has a
* limitation that signals are delivered when some thread returns from syscall to the app */
while (true) {
struct timespec ts = { .tv_sec = 0, .tv_nsec = 1000 * 1000 }; /* 1ms */
nanosleep(&ts, NULL);
}
return 0;
}
21 changes: 21 additions & 0 deletions libos/test/regression/sigterm_multithread.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
loader.entrypoint = "file:{{ gramine.libos }}"
libos.entrypoint = "{{ entrypoint }}"

loader.env.LD_LIBRARY_PATH = "/lib"

sys.enable_sigterm_injection = true

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir(libc) }}" },
{ path = "/{{ entrypoint }}", uri = "file:{{ binary_dir }}/{{ entrypoint }}" },
]

sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}

sgx.trusted_files = [
"file:{{ gramine.libos }}",
"file:{{ gramine.runtimedir(libc) }}/",
"file:{{ binary_dir }}/{{ entrypoint }}",
]
4 changes: 4 additions & 0 deletions libos/test/regression/test_libos.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ def test_403_signalexit_multithread(self):
with self.expect_returncode(134):
self.run_binary(['abort_multithread'])

def test_404_sigterm_multithread(self):
stdout, _ = self.run_binary(['sigterm_multithread'], prefix=['./test_sigterm.sh'])
self.assertIn('SHELL OK', stdout)

def test_404_sigprocmask_pending(self):
stdout, _ = self.run_binary(['sigprocmask_pending'], timeout=60)
self.assertIn('Child OK', stdout)
Expand Down
24 changes: 24 additions & 0 deletions libos/test/regression/test_sigterm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/sh

set -e

rm -f tmp/shell_fifo
mkfifo tmp/shell_fifo

$@ 2>&1 >tmp/shell_fifo &
pid=$!

while read line; do
case "$line" in
*READY*)
break
;;
*)
;;
esac
done <tmp/shell_fifo

kill -TERM $pid
wait $pid

echo "SHELL OK"
1 change: 1 addition & 0 deletions libos/test/regression/tests.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ manifests = [
"sighandler_sigpipe",
"signal_multithread",
"sigprocmask_pending",
"sigterm_multithread",
"socket_ioctl",
"spinlock",
"stat_invalid_args",
Expand Down
1 change: 1 addition & 0 deletions libos/test/regression/tests_musl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ manifests = [
"sighandler_sigpipe",
"signal_multithread",
"sigprocmask_pending",
"sigterm_multithread",
"socket_ioctl",
"spinlock",
"stat_invalid_args",
Expand Down

0 comments on commit 339ce59

Please sign in to comment.