Skip to content

Commit

Permalink
Introduce seccomp feature for libcontainer with musl (#1484)
Browse files Browse the repository at this point in the history
* Introduce seccomp feature for libcontainer with musl

Signed-off-by: Kris Nóva <kris@nivenly.com>

* remove mystery merge conflict

Signed-off-by: Eric Fang <yihuaf@unkies.org>

---------

Signed-off-by: Kris Nóva <kris@nivenly.com>
Signed-off-by: Eric Fang <yihuaf@unkies.org>
Co-authored-by: Eric Fang <yihuaf@unkies.org>
  • Loading branch information
krisnova and yihuaf committed Mar 23, 2023
1 parent 55d8368 commit bef32e0
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 27 deletions.
5 changes: 3 additions & 2 deletions crates/libcontainer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ rust-version = "1.58.1"
keywords = ["youki", "container", "cgroups"]

[features]
default = ["systemd", "v2", "v1"]
default = ["systemd", "v2", "v1", "libseccomp"]
libseccomp = ["dep:libseccomp"]
systemd = ["libcgroups/systemd", "v2"]
v2 = ["libcgroups/v2"]
v1 = ["libcgroups/v1"]
Expand All @@ -35,7 +36,7 @@ oci-spec = { version = "^0.6.0", features = ["runtime"] }
procfs = "0.15.1"
prctl = "1.0.0"
libcgroups = { version = "0.0.4", path = "../libcgroups", default-features = false }
libseccomp = { version = "0.3.0" }
libseccomp = { version = "0.3.0", optional=true }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
syscalls = "0.6.8"
Expand Down
21 changes: 21 additions & 0 deletions crates/libcontainer/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,22 @@
# libcontainer

### Building with musl

In order to build with musl you must first remove the libseccomp dependency as it will reference shared libraries (`libdbus` and `libseccomp`) which cannot be built with musl.

Do this by using adding flags to Cargo. Use the `--no-default-features` flag followed by `-F` and whatever features you intend to build with such as `v2` as defined in Cargo.toml under features section.

Next you will also need the `+nightly` flags when building with `rustup` and `cargo`.

```bash
# Add rustup +nigthly musl to toolchain
rustup +nightly target add $(uname -m)-unknown-linux-musl

# Build rustup +nigthly stdlib with musl
rustup +nightly toolchain install nightly-$(uname -m)-unknown-linux-musl

# Build musl standard library
cargo +nightly build -Zbuild-std --target $(uname -m)-unknown-linux-musl --no-default-features -F v2

cargo +nightly build --target $(uname -m)-unknown-linux-musl --no-default-features -F v2
```
4 changes: 3 additions & 1 deletion crates/libcontainer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ pub mod notify_socket;
pub mod process;
pub mod rootfs;
pub mod rootless;
pub mod seccomp;
pub mod signal;
pub mod syscall;
pub mod tty;
pub mod utils;
pub mod workload;

#[cfg(feature = "libseccomp")]
pub mod seccomp;
24 changes: 22 additions & 2 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ use crate::apparmor;
use crate::syscall::Syscall;
use crate::{
capabilities, hooks, namespaces::Namespaces, process::channel, rootfs::RootFS,
rootless::Rootless, seccomp, tty, utils,
rootless::Rootless, tty, utils,
};
use anyhow::{bail, Context, Ok, Result};
use nix::mount::MsFlags;
use nix::sched::CloneFlags;
use nix::sys::stat::Mode;
use nix::unistd::setsid;

use nix::unistd::{self, Gid, Uid};
use oci_spec::runtime::{LinuxNamespaceType, Spec, User};
use std::collections::HashMap;
Expand All @@ -20,6 +19,12 @@ use std::{
path::{Path, PathBuf},
};

#[cfg(feature = "libseccomp")]
use crate::seccomp;

#[cfg(not(feature = "libseccomp"))]
use log::warn;

fn sysctl(kernel_params: &HashMap<String, String>) -> Result<()> {
let sys = PathBuf::from("/proc/sys");
for (kernel_param, value) in kernel_params {
Expand Down Expand Up @@ -158,6 +163,7 @@ fn reopen_dev_null() -> Result<()> {
Ok(())
}

#[allow(unused_variables)]
pub fn container_init_process(
args: &ContainerArgs,
main_sender: &mut channel::MainSender,
Expand Down Expand Up @@ -348,6 +354,7 @@ pub fn container_init_process(
// Without no new privileges, seccomp is a privileged operation. We have to
// do this before dropping capabilities. Otherwise, we should do it later,
// as close to exec as possible.
#[cfg(feature = "libseccomp")]
if let Some(seccomp) = linux.seccomp() {
if proc.no_new_privileges().is_none() {
let notify_fd =
Expand All @@ -356,6 +363,10 @@ pub fn container_init_process(
.context("failed to sync seccomp")?;
}
}
#[cfg(not(feature = "libseccomp"))]
if proc.no_new_privileges().is_none() {
warn!("seccomp not available, unable to enforce no_new_privileges!")
}

capabilities::reset_effective(syscall).context("Failed to reset effective capabilities")?;
if let Some(caps) = proc.capabilities() {
Expand Down Expand Up @@ -384,6 +395,7 @@ pub fn container_init_process(
// Initialize seccomp profile right before we are ready to execute the
// payload so as few syscalls will happen between here and payload exec. The
// notify socket will still need network related syscalls.
#[cfg(feature = "libseccomp")]
if let Some(seccomp) = linux.seccomp() {
if proc.no_new_privileges().is_some() {
let notify_fd =
Expand All @@ -392,6 +404,10 @@ pub fn container_init_process(
.context("failed to sync seccomp")?;
}
}
#[cfg(not(feature = "libseccomp"))]
if proc.no_new_privileges().is_some() {
warn!("seccomp not available, unable to set seccomp privileges!")
}

// this checks if the binary to run actually exists and if we have permissions to run it.
// Taken from https://github.com/opencontainers/runc/blob/25c9e888686773e7e06429133578038a9abc091d/libcontainer/standard_init_linux.go#L195-L206
Expand Down Expand Up @@ -514,6 +530,7 @@ fn set_supplementary_gids(
Ok(())
}

#[cfg(feature = "libseccomp")]
fn sync_seccomp(
fd: Option<i32>,
main_sender: &mut channel::MainSender,
Expand All @@ -539,8 +556,10 @@ mod tests {
syscall::create_syscall,
test::{ArgName, MountArgs, TestHelperSyscall},
};
#[cfg(feature = "libseccomp")]
use nix::unistd;
use oci_spec::runtime::{LinuxNamespaceBuilder, SpecBuilder, UserBuilder};
#[cfg(feature = "libseccomp")]
use serial_test::serial;
use std::fs;

Expand Down Expand Up @@ -675,6 +694,7 @@ mod tests {

#[test]
#[serial]
#[cfg(feature = "libseccomp")]
fn test_sync_seccomp() -> Result<()> {
use std::os::unix::io::IntoRawFd;
use std::thread;
Expand Down
28 changes: 19 additions & 9 deletions crates/libcontainer/src/process/container_main_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@ use crate::{
container::ContainerProcessState,
process::{args::ContainerArgs, channel, container_intermediate_process, fork},
rootless::Rootless,
seccomp, utils,
utils,
};
use anyhow::{Context, Result};
use nix::sys::wait::{waitpid, WaitStatus};
use nix::unistd::Pid;

#[cfg(feature = "libseccomp")]
use crate::seccomp;
#[cfg(feature = "libseccomp")]
use nix::{
sys::{
socket::{self, UnixAddr},
wait::{waitpid, WaitStatus},
},
unistd::{self, Pid},
sys::socket::{self, UnixAddr},
unistd::{self},
};
#[cfg(feature = "libseccomp")]
use oci_spec::runtime;
#[cfg(feature = "libseccomp")]
use std::{io::IoSlice, path::Path};

pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
Expand Down Expand Up @@ -66,6 +71,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {

if let Some(linux) = container_args.spec.linux() {
if let Some(seccomp) = linux.seccomp() {
#[allow(unused_variables)]
let state = ContainerProcessState {
oci_version: container_args.spec.version().to_string(),
// runc hardcode the `seccompFd` name for fds.
Expand All @@ -79,6 +85,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
.state
.clone(),
};
#[cfg(feature = "libseccomp")]
sync_seccomp(seccomp, &state, init_sender, main_receiver)
.context("failed to sync seccomp with init")?;
}
Expand Down Expand Up @@ -114,6 +121,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
Ok(init_pid)
}

#[cfg(feature = "libseccomp")]
fn sync_seccomp(
seccomp: &runtime::LinuxSeccomp,
state: &ContainerProcessState,
Expand Down Expand Up @@ -141,6 +149,7 @@ fn sync_seccomp(
Ok(())
}

#[cfg(feature = "libseccomp")]
fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<()> {
// The seccomp listener has specific instructions on how to transmit the
// information through seccomp listener. Therefore, we have to use
Expand Down Expand Up @@ -197,9 +206,9 @@ mod tests {
sched::{unshare, CloneFlags},
unistd::{self, getgid, getuid},
};
use oci_spec::runtime::{
LinuxIdMappingBuilder, LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder,
};
use oci_spec::runtime::LinuxIdMappingBuilder;
#[cfg(feature = "libseccomp")]
use oci_spec::runtime::{LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder};
use serial_test::serial;
use std::fs;

Expand Down Expand Up @@ -320,6 +329,7 @@ mod tests {

#[test]
#[serial]
#[cfg(feature = "libseccomp")]
fn test_sync_seccomp() -> Result<()> {
use std::io::Read;
use std::os::unix::io::IntoRawFd;
Expand Down
30 changes: 18 additions & 12 deletions crates/libcontainer/src/syscall/linux.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
//! Implements Command trait for Linux systems
use std::ffi::{CStr, CString, OsStr};
use std::fs;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::symlink;
use std::os::unix::io::RawFd;
use std::str::FromStr;
use std::sync::Arc;
use std::{any::Any, mem, path::Path, ptr};

use anyhow::{anyhow, bail, Context, Error, Result};
use caps::{CapSet, CapsHashSet};
use libc::{c_char, setdomainname, uid_t};
Expand All @@ -21,9 +12,16 @@ use nix::{
unistd,
unistd::{chown, fchdir, pivot_root, setgroups, sethostname, Gid, Uid},
};
use syscalls::{syscall, Sysno, Sysno::close_range};

use oci_spec::runtime::LinuxRlimit;
use std::ffi::{CStr, CString, OsStr};
use std::fs;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::symlink;
use std::os::unix::io::RawFd;
use std::str::FromStr;
use std::sync::Arc;
use std::{any::Any, mem, path::Path, ptr};
use syscalls::{syscall, Sysno, Sysno::close_range};

use super::Syscall;
use crate::syscall::syscall::CloseRange;
Expand Down Expand Up @@ -374,7 +372,13 @@ impl Syscall for LinuxSyscall {
rlim_cur: rlimit.soft(),
rlim_max: rlimit.hard(),
};

// Change for musl libc based on seccomp needs
#[cfg(not(target_env = "musl"))]
let res = unsafe { libc::setrlimit(rlimit.typ() as u32, rlim) };
#[cfg(target_env = "musl")]
let res = unsafe { libc::setrlimit(rlimit.typ() as i32, rlim) };

if let Err(e) = Errno::result(res).map(drop) {
bail!("Failed to set {:?}. {:?}", rlimit.typ(), e)
}
Expand Down Expand Up @@ -472,7 +476,6 @@ impl Syscall for LinuxSyscall {
CloseRange::CLOEXEC.bits()
)
};

match result {
Ok(_) => Ok(()),
Err(e) if e == syscalls::Errno::ENOSYS || e == syscalls::Errno::EINVAL => {
Expand Down Expand Up @@ -500,6 +503,9 @@ impl Syscall for LinuxSyscall {
};
let result = unsafe {
// TODO: nix/libc crate hasn't supported mount_setattr system call yet.
// TODO: @krisnova migrate all youki to libc::SYS_mount_setattr
// https://docs.rs/libc/0.2.139/libc/constant.SYS_mount_setattr.html
// https://docs.rs/libc/0.2.139/libc/fn.syscall.html
syscall!(
Sysno::mount_setattr,
dirfd,
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ pub(crate) mod test_utils {
message: String,
}

#[allow(dead_code)]
pub fn test_in_child_process<F: FnOnce() -> Result<()>>(cb: F) -> Result<()> {
let (mut sender, mut receiver) = channel::channel::<TestResult>()?;
match unsafe { nix::unistd::fork()? } {
Expand Down
15 changes: 14 additions & 1 deletion scripts/features_test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/bin/bash

set -eu

# Build the different features individually
Expand All @@ -16,4 +15,18 @@ cargo test --no-default-features -F systemd
cargo test --no-default-features -F v2 -F cgroupsv2_devices
cargo test --no-default-features -F systemd -F cgroupsv2_devices

# Build with musl: libcontainer
cargo +nightly build \
-Zbuild-std \
--target $(uname -m)-unknown-linux-musl \
--package libcontainer \
--no-default-features -F v2

# Test with musl: libcontainer
cargo +nightly test \
-Zbuild-std \
--target $(uname -m)-unknown-linux-musl \
--package libcontainer \
--no-default-features -F v2

exit 0

0 comments on commit bef32e0

Please sign in to comment.