Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prototype support for macvtap interfaces #2217

Merged
merged 8 commits into from Mar 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
85 changes: 78 additions & 7 deletions src/devices/src/virtio/net/tap.rs
Expand Up @@ -5,13 +5,23 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.

use net_gen::ifreq;
use std::fs::File;
use std::fs::{File, OpenOptions};
use std::io::{Error as IoError, Read, Result as IoResult, Write};
use std::os::raw::*;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use utils::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val};
use utils::{ioctl_expr, ioctl_ioc_nr, ioctl_iow_nr};
use std::os::{
raw::*,
unix::{
fs::{FileTypeExt, OpenOptionsExt},
io::{AsRawFd, FromRawFd, RawFd},
},
};
use std::path::Path;

use net_gen::ifreq;
use utils::{
ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val},
ioctl_expr, ioctl_ioc_nr, ioctl_iow_nr,
net::macvtap::MacVTap,
};

// As defined in the Linux UAPI:
// https://elixir.bootlin.com/linux/v4.17/source/include/uapi/linux/if.h#L33
Expand All @@ -24,10 +34,16 @@ pub enum Error {
CreateTap(IoError),
/// Invalid interface name.
InvalidIfname,
/// Tap interface device is not a character device.
InvalidTapDevType,
/// ioctl failed.
IoctlError(IoError),
/// Unable to open tap interface device.
OpenTapDev(IoError),
/// Couldn't open /dev/net/tun.
OpenTun(IoError),
/// Unable to stat tap interface device for macvtap interface.
StatTapDev(IoError),
}

pub type Result<T> = ::std::result::Result<T, Error>;
Expand Down Expand Up @@ -100,11 +116,66 @@ impl IfReqBuilder {
}

impl Tap {
/// Create a TUN/TAP device given the interface name.
/// * `if_name` - the name of the interface.
/// Create a TUN/TAP device given the tap or macvtap interface name.
/// # Arguments
///
/// * `if_name` - the name of the interface.
pub fn open_named(if_name: &str) -> Result<Tap> {
// Options:
// - /dev/net/<if_name> exists; open it.
// - It's a macvtap device: determine by checking /sys; open the
// corresponding /dev/tapX node.
// - It's a tap device: open /dev/net/tun and allocate via SETIFF.
if let Ok(path) = MacVTap::get_device_node(if_name) {
Self::macvtap_open_named(if_name, &path)
} else {
Self::tap_open_named(if_name)
}
}

/// Create a TUN/TAP device given the macvtap interface name and device node.
/// # Arguments
///
/// * `if_name` - the name of the interface.
/// * `dev_path` - location of the interface's device node.
fn macvtap_open_named(if_name: &str, dev_path: &Path) -> Result<Tap> {
// Open the device node
let mut opts = OpenOptions::new();
let tap_file = opts
.read(true)
.write(true)
.custom_flags(libc::O_NONBLOCK | libc::O_CLOEXEC)
.open(dev_path)
.map_err(Error::OpenTapDev)?;

// Must be a char device
let md = tap_file.metadata().map_err(Error::StatTapDev)?;
if !md.file_type().is_char_device() {
return Err(Error::InvalidTapDevType);
}

// The length check is probably unnecessary because we know that the
// network interface is valid at this point, but it doesn't hurt.
let name_bytes = if_name.as_bytes();
if name_bytes.len() >= IFACE_NAME_MAX_LEN {
return Err(Error::InvalidIfname);
}

let mut ret = Tap {
tap_file,
if_name: [0; IFACE_NAME_MAX_LEN],
};

ret.if_name[..name_bytes.len()].copy_from_slice(name_bytes);
Ok(ret)
}

/// Create a TUN/TAP device given the tap interface name.
/// # Arguments
///
/// * `if_name` - the name of the interface.
fn tap_open_named(if_name: &str) -> Result<Tap> {
let terminated_if_name = build_terminated_if_name(if_name)?;

let fd = unsafe {
Expand Down
112 changes: 102 additions & 10 deletions src/jailer/src/env.rs
Expand Up @@ -9,11 +9,12 @@ use std::os::unix::process::CommandExt;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};

use crate::cgroup;
use crate::cgroup::Cgroup;
use crate::chroot::chroot;
use crate::{cgroup, to_cstring};
use crate::{Error, Result};
use utils::arg_parser::Error::MissingValue;
use utils::net::macvtap::MacVTap;
use utils::syscall::SyscallReturnCode;
use utils::{arg_parser, validators};

Expand Down Expand Up @@ -64,6 +65,7 @@ pub struct Env {
start_time_cpu_us: u64,
extra_args: Vec<String>,
cgroups: Vec<Cgroup>,
macvtaps: Vec<String>,
}

impl Env {
Expand Down Expand Up @@ -157,6 +159,15 @@ impl Env {
}
}

// macvtap arg format: --macvtap if_name => create device node /dev/net/if_name in the chroot.
let mut macvtaps = Vec::new();
// We do not create here the MacVTaps since we need to do join_netns before that.
if let Some(macvtap_args) = arguments.multiple_values("macvtap") {
for arg in macvtap_args {
macvtaps.push(arg.to_string());
}
}

Ok(Env {
id: id.to_owned(),
chroot_dir,
Expand All @@ -169,6 +180,7 @@ impl Env {
start_time_cpu_us,
extra_args: arguments.extra_args(),
cgroups,
macvtaps,
})
}

Expand Down Expand Up @@ -275,12 +287,57 @@ impl Env {
// namespace. Safe because we are passing valid parameters.
SyscallReturnCode(unsafe { libc::close(netns_fd) })
.into_empty_result()
.map_err(Error::CloseNetNsFd)
.map_err(Error::CloseNetNsFd)?;

// Since namespaces are shared by default when creating a new process using fork or clone,
// unshare() is used to disassociate (unshare) the current process from the mount namespace:
// https://linux.die.net/man/2/unshare.
SyscallReturnCode(unsafe { libc::unshare(libc::CLONE_NEWNS) })
.into_empty_result()
.map_err(Error::MountSysfs)?;

// It is not sufficient to join the new network namespace,
// we also need to mount a version of /sys that describes
// the network namespace.

// Don't let any mounts propagate back to the parent.
// This means that the sysfs of the network namespace will only
// get mounted for the jailer process.
SyscallReturnCode(unsafe {
libc::mount(
to_cstring("")?.as_ptr(),
to_cstring("/")?.as_ptr(),
to_cstring("none")?.as_ptr(),
libc::MS_SLAVE | libc::MS_REC,
std::ptr::null(),
)
})
.into_empty_result()
.map_err(Error::MountSysfs)?;

// Unmount the current sysfs since it's describing the previous namespace.
let cstr_sys = to_cstring(Path::new("/sys"))?;
SyscallReturnCode(unsafe { libc::umount2(cstr_sys.as_ptr(), libc::MNT_DETACH) })
.into_empty_result()
.map_err(Error::UmountSysfs)?;

// Actually mount the sysfs corresponding to the current namespace.
SyscallReturnCode(unsafe {
libc::mount(
std::ptr::null(),
cstr_sys.as_ptr(),
to_cstring("sysfs")?.as_ptr(),
0,
std::ptr::null(),
)
})
.into_empty_result()
.map_err(Error::MountSysfs)
}

#[cfg(target_arch = "aarch64")]
fn copy_cache_info(&self) -> Result<()> {
use crate::{readln_special, to_cstring, writeln_special};
use crate::{readln_special, writeln_special};

const HOST_CACHE_INFO: &str = "/sys/devices/system/cpu/cpu0/cache";
// Based on https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/kernel/cacheinfo.c#L29.
Expand Down Expand Up @@ -340,7 +397,7 @@ impl Env {

#[cfg(target_arch = "aarch64")]
fn copy_midr_el1_info(&self) -> Result<()> {
use crate::{readln_special, to_cstring, writeln_special};
use crate::{readln_special, writeln_special};

const HOST_MIDR_EL1_INFO: &str = "/sys/devices/system/cpu/cpu0/regs/identification";

Expand Down Expand Up @@ -368,11 +425,7 @@ impl Env {
pub fn run(mut self) -> Result<()> {
let exec_file_name = self.copy_exec_to_chroot()?;
let chroot_exec_file = PathBuf::from("/").join(&exec_file_name);

// Join the specified network namespace, if applicable.
if let Some(ref path) = self.netns {
Env::join_netns(path)?;
}
let mut macvtaps = Vec::new();

// We have to setup cgroups at this point, because we can't do it anymore after chrooting.
// cgroups are iterated two times as some cgroups may require others (e.g cpuset requires
Expand All @@ -387,6 +440,18 @@ impl Env {
cgroup.attach_pid().unwrap();
}

// Join the specified network namespace, if applicable.
if let Some(ref path) = self.netns {
Env::join_netns(path)?;
}

for macvtap in &self.macvtaps {
macvtaps.push(
MacVTap::by_name(&macvtap)
.map_err(|e| Error::MacVTapByName(macvtap.to_string(), e))?,
);
}

// If daemonization was requested, open /dev/null before chrooting.
let dev_null = if self.daemonize {
// Safe because we use a constant null-terminated string and verify the result.
Expand Down Expand Up @@ -429,6 +494,14 @@ impl Env {
// Do the same for /dev/kvm with (major, minor) = (10, 232).
self.mknod_and_own_dev(DEV_KVM_WITH_NUL, DEV_KVM_MAJOR, DEV_KVM_MINOR)?;

// Create requested macvtap devices inside the jailer.
for iface in &macvtaps {
let path = Path::new("/dev/net").join(&iface.if_name);
iface
.mknod(&path, self.uid, self.gid)
.map_err(|e| Error::MacVTapMknod(path, e))?
}

// Daemonize before exec, if so required (when the dev_null variable != None).
if let Some(fd) = dev_null {
// Call setsid(). Safe because it's a library function.
Expand Down Expand Up @@ -484,6 +557,7 @@ mod tests {
pub netns: Option<&'a str>,
pub daemonize: bool,
pub cgroups: Vec<&'a str>,
pub macvtaps: Vec<&'a str>,
}

impl ArgVals<'_> {
Expand All @@ -498,6 +572,7 @@ mod tests {
netns: Some("zzzns"),
daemonize: true,
cgroups: vec!["cpu.shares=2", "cpuset.mems=0"],
macvtaps: vec![],
}
}
}
Expand Down Expand Up @@ -528,6 +603,12 @@ mod tests {
arg_vec.push((*cg).to_string());
}

// Append cgroups arguments
for macvtap in &arg_vals.macvtaps {
arg_vec.push("--macvtap".to_string());
arg_vec.push((*macvtap).to_string());
}

if let Some(s) = arg_vals.netns {
arg_vec.push("--netns".to_string());
arg_vec.push(s.to_string());
Expand Down Expand Up @@ -581,7 +662,7 @@ mod tests {
let another_good_arg_vals = ArgVals {
netns: None,
daemonize: false,
..good_arg_vals
..good_arg_vals.clone()
};

let arg_parser = build_arg_parser();
Expand Down Expand Up @@ -657,6 +738,16 @@ mod tests {
args.parse(&make_args(&invalid_gid_arg_vals)).unwrap();
assert!(Env::new(&args, 0, 0).is_err());

let macvtap_args = ArgVals {
macvtaps: vec!["vtap1", "vtap0"],
..good_arg_vals.clone()
};

let arg_parser = build_arg_parser();
args = arg_parser.arguments().clone();
args.parse(&make_args(&macvtap_args)).unwrap();
assert!(Env::new(&args, 0, 0).is_ok());

// The chroot-base-dir param is not validated by Env::new, but rather in run, when we
// actually attempt to create the folder structure (the same goes for netns).
}
Expand Down Expand Up @@ -797,6 +888,7 @@ mod tests {
netns: Some("zzzns"),
daemonize: false,
cgroups: Vec::new(),
macvtaps: Vec::new(),
};
fs::write(some_file_path, "some_content").unwrap();
args.parse(&make_args(&some_arg_vals)).unwrap();
Expand Down