Skip to content

Commit

Permalink
Add option to abort VMM when parent process dies
Browse files Browse the repository at this point in the history
If Firecracker is being monitored by a parent process that unexpectedly
terminates, it will be abandoned up the process tree, likely to
a process that doesn't know what do with it (such as init). This becomes
even trickier if the process was running in a mount namespace that was
controlled by the parent process, as the API socket is now inaccessible.

If the parent process was also keeping handles on other resources used by the
Firecracker VMM, these could be re-used by new processes and cause
conflicts with the now orphaned Firecracker.

This adds a flag to set the parent death signal (SIGUSR2 in this
instance) that the process will receive when its parent process exits
before the VMM does. Receipt of this signal will cause the VMM to
abruptly abort, much like the SIGILL signal. While a graceful shutdown
would be preferable, since the parent process may have been controlling
outside resources for Firecracker (disks, networking, etc.), it's
indeterminate whether or not it is safe to continue running the VM.

Signed-off-by: Josh Seba <jseba@cloudflare.com>
  • Loading branch information
jseba committed Oct 12, 2023
1 parent 5530e4c commit c1164ce
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 4 deletions.
8 changes: 7 additions & 1 deletion src/firecracker/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,11 @@ fn main_exec() -> Result<(), MainError> {
Argument::new("mmds-size-limit")
.takes_value(true)
.help("Mmds data store limit, in bytes."),
)
.arg(
Argument::new("no-outlive-parent")
.takes_value(false)
.help("Whether to abort the VMM when the parent process dies."),
);

arg_parser.parse_from_cmdline()?;
Expand All @@ -263,7 +268,8 @@ fn main_exec() -> Result<(), MainError> {
return Ok(());
}

register_signal_handlers().map_err(MainError::RegisterSignalHandlers)?;
let register_pdeathsig = arguments.flag_present("no-outlive-parent");
register_signal_handlers(register_pdeathsig).map_err(MainError::RegisterSignalHandlers)?;

#[cfg(target_arch = "aarch64")]
enable_ssbd_mitigation();
Expand Down
2 changes: 2 additions & 0 deletions src/vmm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ pub enum FcExitCode {
SIGHUP = 156,
/// Firecracker was shut down after intercepting `SIGILL`.
SIGILL = 157,
/// Firecracker was shut down after intercepting `SIGUSR2` (parent process has died).
SIGUSR2 = 158,
/// Bad configuration for microvm's resources, when using a single json.
BadConfiguration = 152,
/// Command line arguments parsing error.
Expand Down
3 changes: 3 additions & 0 deletions src/vmm/src/logger/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,8 @@ pub struct SignalMetrics {
pub sighup: SharedStoreMetric,
/// Number of times that SIGILL was handled.
pub sigill: SharedStoreMetric,
/// Number of times that SIGUSR2 was handled.
pub sigusr2: SharedStoreMetric,
}
impl SignalMetrics {
/// Const default construction.
Expand All @@ -974,6 +976,7 @@ impl SignalMetrics {
sigpipe: SharedIncMetric::new(),
sighup: SharedStoreMetric::new(),
sigill: SharedStoreMetric::new(),
sigusr2: SharedStoreMetric::new(),
}
}
}
Expand Down
38 changes: 35 additions & 3 deletions src/vmm/src/signal_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
// SPDX-License-Identifier: Apache-2.0

use libc::{
c_int, c_void, siginfo_t, SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ,
c_int, c_void, prctl, siginfo_t, PR_SET_PDEATHSIG, SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV,
SIGSYS, SIGUSR2, SIGXCPU, SIGXFSZ,
};
use log::error;
use utils::signal::register_signal_handler;
Expand Down Expand Up @@ -74,6 +75,13 @@ fn log_sigsys_err(si_code: c_int, info: *mut siginfo_t) {
);
}

fn log_parent_death_signal(si_code: c_int, _info: *mut siginfo_t) {
error!(
"Shutting down VM after intercepting the parent death signal ({}).",
si_code
);
}

fn empty_fn(_si_code: c_int, _info: *mut siginfo_t) {}

generate_handler!(
Expand Down Expand Up @@ -123,6 +131,7 @@ generate_handler!(
METRICS.signals.sighup,
empty_fn
);

generate_handler!(
sigill_handler,
SIGILL,
Expand All @@ -131,6 +140,14 @@ generate_handler!(
empty_fn
);

generate_handler!(
sigusr2_handler,
SIGUSR2,
SIGUSR2,
METRICS.signals.sigusr2,
log_parent_death_signal
);

#[inline(always)]
extern "C" fn sigpipe_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) {
// Just record the metric and allow the process to continue, the EPIPE error needs
Expand All @@ -155,7 +172,7 @@ extern "C" fn sigpipe_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_
///
/// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS`
/// `SIGXFSZ` `SIGXCPU` `SIGPIPE` `SIGHUP` and `SIGILL`.
pub fn register_signal_handlers() -> utils::errno::Result<()> {
pub fn register_signal_handlers(register_pdeathsig: bool) -> utils::errno::Result<()> {
// Call to unsafe register_signal_handler which is considered unsafe because it will
// register a signal handler which will be called in the current thread and will interrupt
// whatever work is done on the current thread, so we have to keep in mind that the registered
Expand All @@ -168,6 +185,14 @@ pub fn register_signal_handlers() -> utils::errno::Result<()> {
register_signal_handler(SIGPIPE, sigpipe_handler)?;
register_signal_handler(SIGHUP, sighup_handler)?;
register_signal_handler(SIGILL, sigill_handler)?;

if register_pdeathsig {
register_signal_handler(SIGUSR2, sigusr2_handler)?;
unsafe {
let rc = libc::prctl(PR_SET_PDEATHSIG, SIGUSR2);
assert_eq!(rc, 0);
}
}
Ok(())
}

Expand All @@ -184,7 +209,7 @@ mod tests {
#[test]
fn test_signal_handler() {
let child = thread::spawn(move || {
assert!(register_signal_handlers().is_ok());
assert!(register_signal_handlers(true).is_ok());

let filter = make_test_seccomp_bpf_filter();

Expand Down Expand Up @@ -235,6 +260,12 @@ mod tests {
unsafe {
syscall(libc::SYS_kill, process::id(), SIGILL);
}

// Call SIGUSR2 signal handler.
assert_eq!(METRICS.signals.sigusr2.fetch(), 0);
unsafe {
syscall(libc::SYS_kill, process::id(), SIGUSR2);
}
});
assert!(child.join().is_ok());

Expand All @@ -246,6 +277,7 @@ mod tests {
assert!(METRICS.signals.sigpipe.count() >= 1);
assert!(METRICS.signals.sighup.fetch() >= 1);
assert!(METRICS.signals.sigill.fetch() >= 1);
assert!(METRICS.signals.sigusr2.fetch() >= 1);
}

fn make_test_seccomp_bpf_filter() -> Vec<sock_filter> {
Expand Down

0 comments on commit c1164ce

Please sign in to comment.