diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index bdb32ae75..57d96b6af 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -62,8 +62,8 @@ impl<'a> ContainerBuilderImpl<'a> { } // We use a set of channels to communicate between parent and child process. Each channel is uni-directional. - let parent_to_child = &mut channel::Channel::new()?; - let child_to_parent = &mut channel::Channel::new()?; + let (sender_to_intermediate, receiver_from_main) = &mut channel::main_to_intermediate()?; + let (sender_to_main, receiver_from_intermediate) = &mut channel::intermediate_to_main()?; // Need to create the notify socket before we pivot root, since the unix // domain socket used here is outside of the rootfs of container. During @@ -87,36 +87,37 @@ impl<'a> ContainerBuilderImpl<'a> { let intermediate_pid = fork::container_fork(|| { // The fds in the pipe is duplicated during fork, so we first close // the unused fds. Note, this already runs in the child process. - parent_to_child - .close_sender() + sender_to_intermediate + .close() .context("Failed to close unused sender")?; - child_to_parent - .close_receiver() + receiver_from_intermediate + .close() .context("Failed to close unused receiver")?; - init::container_intermidiate(init_args, parent_to_child, child_to_parent) + init::container_intermidiate(init_args, receiver_from_main, sender_to_main) })?; // Close down unused fds. The corresponding fds are duplicated to the // child process during fork. - parent_to_child - .close_receiver() + receiver_from_main + .close() .context("Failed to close parent to child receiver")?; - child_to_parent - .close_sender() + sender_to_main + .close() .context("Failed to close child to parent sender")?; + // If creating a rootless container, the intermediate process will ask // the main process to set up uid and gid mapping, once the intermediate // process enters into a new user namespace. if self.rootless.is_some() { - child_to_parent.wait_for_mapping_request()?; + receiver_from_intermediate.wait_for_mapping_request()?; log::debug!("write mapping for pid {:?}", intermediate_pid); utils::write_file(format!("/proc/{}/setgroups", intermediate_pid), "deny")?; rootless::write_uid_mapping(intermediate_pid, self.rootless.as_ref())?; rootless::write_gid_mapping(intermediate_pid, self.rootless.as_ref())?; - parent_to_child.send_mapping_written()?; + sender_to_intermediate.mapping_written()?; } - let init_pid = child_to_parent.wait_for_child_ready()?; + let init_pid = receiver_from_intermediate.wait_for_intermediate_ready()?; log::debug!("init pid is {:?}", init_pid); cmanager diff --git a/src/process/channel.rs b/src/process/channel.rs index 2751f18ed..0f09f0807 100644 --- a/src/process/channel.rs +++ b/src/process/channel.rs @@ -10,53 +10,58 @@ use std::io::Read; use std::io::Write; use std::os::unix::io::AsRawFd; -pub struct Channel { - sender: Sender, - receiver: Receiver, +trait SenderExt { + fn write_message(&mut self, msg: Message) -> Result<()>; } -impl Channel { - pub fn new() -> Result { - let (sender, receiver) = pipe::new()?; - // Our use case is for the process to wait for the communication to come - // through, so we set nonblocking to false here (double negative). It is - // expected that the waiting process will block and wait. - receiver - .set_nonblocking(false) - .with_context(|| "Failed to set channel receiver to blocking")?; - Ok(Self { sender, receiver }) - } - - pub fn send_child_ready(&mut self, pid: Pid) -> Result<()> { - // Send over the ChildReady follow by the pid. - log::debug!("sending init pid ({:?})", pid); - self.write_message(Message::ChildReady)?; - self.sender.write_all(&(pid.as_raw()).to_be_bytes())?; +impl SenderExt for Sender { + #[inline] + fn write_message(&mut self, msg: Message) -> Result<()> { + let bytes = (msg as u8).to_be_bytes(); + self.write_all(&bytes) + .with_context(|| format!("Failed to write message {:?} to the pipe", bytes))?; Ok(()) } +} - // requests the parent to write the id mappings for the child process - // this needs to be done from the parent see https://man7.org/linux/man-pages/man7/user_namespaces.7.html - pub fn send_identifier_mapping_request(&mut self) -> Result<()> { - log::debug!("send identifier mapping request"); - self.write_message(Message::WriteMapping)?; - Ok(()) - } +pub fn main_to_intermediate() -> Result<(SenderMainToIntermediate, ReceiverFromMain)> { + let (sender, receiver) = new_pipe()?; + Ok(( + SenderMainToIntermediate { sender }, + ReceiverFromMain { receiver }, + )) +} + +pub struct SenderMainToIntermediate { + sender: Sender, +} - pub fn send_mapping_written(&mut self) -> Result<()> { +impl SenderMainToIntermediate { + pub fn mapping_written(&mut self) -> Result<()> { log::debug!("identifier mapping written"); self.sender .write_all(&(Message::MappingWritten as u8).to_be_bytes())?; Ok(()) } + pub fn close(&self) -> Result<()> { + unistd::close(self.sender.as_raw_fd())?; + Ok(()) + } +} + +pub struct ReceiverFromMain { + receiver: Receiver, +} + +impl ReceiverFromMain { // wait until the parent process has finished writing the id mappings pub fn wait_for_mapping_ack(&mut self) -> Result<()> { log::debug!("waiting for mapping ack"); let mut buf = [0; 1]; self.receiver .read_exact(&mut buf) - .with_context(|| "Failed to receive a message from the child process.")?; + .with_context(|| "Failed to receive a message from the main process.")?; match Message::from(u8::from_be_bytes(buf)) { Message::MappingWritten => Ok(()), @@ -67,6 +72,52 @@ impl Channel { } } + pub fn close(&self) -> Result<()> { + unistd::close(self.receiver.as_raw_fd())?; + Ok(()) + } +} + +pub fn intermediate_to_main() -> Result<(SenderIntermediateToMain, ReceiverFromIntermediate)> { + let (sender, receiver) = new_pipe()?; + Ok(( + SenderIntermediateToMain { sender }, + ReceiverFromIntermediate { receiver }, + )) +} + +pub struct SenderIntermediateToMain { + sender: Sender, +} + +impl SenderIntermediateToMain { + // requests the Main to write the id mappings for the intermediate process + // this needs to be done from the parent see https://man7.org/linux/man-pages/man7/user_namespaces.7.html + pub fn identifier_mapping_request(&mut self) -> Result<()> { + log::debug!("send identifier mapping request"); + self.sender.write_message(Message::WriteMapping)?; + Ok(()) + } + + pub fn intermediate_ready(&mut self, pid: Pid) -> Result<()> { + // Send over the IntermediateReady follow by the pid. + log::debug!("sending init pid ({:?})", pid); + self.sender.write_message(Message::IntermediateReady)?; + self.sender.write_all(&(pid.as_raw()).to_be_bytes())?; + Ok(()) + } + + pub fn close(&self) -> Result<()> { + unistd::close(self.sender.as_raw_fd())?; + Ok(()) + } +} + +pub struct ReceiverFromIntermediate { + receiver: Receiver, +} + +impl ReceiverFromIntermediate { pub fn wait_for_mapping_request(&mut self) -> Result<()> { let mut buf = [0; 1]; self.receiver @@ -83,51 +134,101 @@ impl Channel { } } - /// Waits for associated child process to send ready message - /// and return the pid of init process which is forked by child process - pub fn wait_for_child_ready(&mut self) -> Result { + /// Waits for associated intermediate process to send ready message + /// and return the pid of init process which is forked by intermediate process + pub fn wait_for_intermediate_ready(&mut self) -> Result { let mut buf = [0; 1]; self.receiver .read_exact(&mut buf) - .with_context(|| "Failed to receive a message from the child process.")?; + .with_context(|| "Failed to receive a message from the intermediate process.")?; match Message::from(u8::from_be_bytes(buf)) { - Message::ChildReady => { - log::debug!("received child ready message"); + Message::IntermediateReady => { + log::debug!("received intermediate ready message"); // Read the Pid which will be i32 or 4 bytes. let mut buf = [0; 4]; - self.receiver - .read_exact(&mut buf) - .with_context(|| "Failed to receive a message from the child process.")?; + self.receiver.read_exact(&mut buf).with_context(|| { + "Failed to receive a message from the intermediate process." + })?; Ok(Pid::from_raw(i32::from_be_bytes(buf))) } msg => bail!( - "receive unexpected message {:?} waiting for child ready", + "receive unexpected message {:?} waiting for intermediate ready", msg ), } } - pub fn close_receiver(&self) -> Result<()> { + pub fn close(&self) -> Result<()> { unistd::close(self.receiver.as_raw_fd())?; + Ok(()) + } +} + +pub fn init_to_intermediate() -> Result<(SenderInitToIntermediate, ReceiverFromInit)> { + let (sender, receiver) = new_pipe()?; + Ok(( + SenderInitToIntermediate { sender }, + ReceiverFromInit { receiver }, + )) +} + +pub struct SenderInitToIntermediate { + sender: Sender, +} +impl SenderInitToIntermediate { + pub fn init_ready(&mut self) -> Result<()> { + self.sender.write_message(Message::InitReady)?; Ok(()) } - pub fn close_sender(&self) -> Result<()> { + pub fn close(&self) -> Result<()> { unistd::close(self.sender.as_raw_fd())?; - Ok(()) } +} - #[inline] - fn write_message(&mut self, msg: Message) -> Result<()> { - self.sender.write_all(&(msg as u8).to_be_bytes())?; +pub struct ReceiverFromInit { + receiver: Receiver, +} + +impl ReceiverFromInit { + /// Waits for associated init process to send ready message + /// and return the pid of init process which is forked by init process + pub fn wait_for_init_ready(&mut self) -> Result<()> { + let mut buf = [0; 1]; + self.receiver + .read_exact(&mut buf) + .with_context(|| "Failed to receive a message from the init process.")?; + + match Message::from(u8::from_be_bytes(buf)) { + Message::InitReady => Ok(()), + msg => bail!( + "receive unexpected message {:?} waiting for init ready", + msg + ), + } + } + + pub fn close(&self) -> Result<()> { + unistd::close(self.receiver.as_raw_fd())?; Ok(()) } } +fn new_pipe() -> Result<(Sender, Receiver)> { + let (sender, receiver) = pipe::new()?; + // Our use case is for the process to wait for the communication to come + // through, so we set nonblocking to false here (double negative). It is + // expected that the waiting process will block and wait. + receiver + .set_nonblocking(false) + .with_context(|| "Failed to set channel receiver to blocking")?; + Ok((sender, receiver)) +} + #[cfg(test)] mod tests { use super::*; @@ -136,20 +237,41 @@ mod tests { use nix::unistd; #[test] - fn test_channel_child_ready() -> Result<()> { - let ch = &mut Channel::new()?; + fn test_channel_intermadiate_ready() -> Result<()> { + let (sender, receiver) = &mut intermediate_to_main()?; match unsafe { unistd::fork()? } { unistd::ForkResult::Parent { child } => { - let pid = ch - .wait_for_child_ready() - .with_context(|| "Failed to wait for child ready")?; + let pid = receiver + .wait_for_intermediate_ready() + .with_context(|| "Failed to wait for intermadiate ready")?; assert_eq!(pid, child); wait::waitpid(child, None)?; } unistd::ForkResult::Child => { let pid = unistd::getpid(); - ch.send_child_ready(pid) - .with_context(|| "Failed to send child ready")?; + sender + .intermediate_ready(pid) + .with_context(|| "Failed to send intermadiate ready")?; + std::process::exit(0); + } + }; + Ok(()) + } + + #[test] + fn test_channel_id_mapping_request() -> Result<()> { + let (sender, receiver) = &mut intermediate_to_main()?; + match unsafe { unistd::fork()? } { + unistd::ForkResult::Parent { child } => { + receiver + .wait_for_mapping_request() + .with_context(|| "Failed to wait for mapping ack")?; + wait::waitpid(child, None)?; + } + unistd::ForkResult::Child => { + sender + .identifier_mapping_request() + .with_context(|| "Failed to send mapping written")?; std::process::exit(0); } }; @@ -158,16 +280,18 @@ mod tests { } #[test] - fn test_channel_id_mapping() -> Result<()> { - let ch = &mut Channel::new()?; + fn test_channel_id_mapping_ack() -> Result<()> { + let (sender, receiver) = &mut main_to_intermediate()?; match unsafe { unistd::fork()? } { unistd::ForkResult::Parent { child } => { - ch.wait_for_mapping_ack() + receiver + .wait_for_mapping_ack() .with_context(|| "Failed to wait for mapping ack")?; wait::waitpid(child, None)?; } unistd::ForkResult::Child => { - ch.send_mapping_written() + sender + .mapping_written() .with_context(|| "Failed to send mapping written")?; std::process::exit(0); } @@ -177,20 +301,62 @@ mod tests { } #[test] - fn test_channel_graceful_exit() -> Result<()> { - let ch = &mut Channel::new()?; + fn test_channel_init_ready() -> Result<()> { + let (sender, receiver) = &mut init_to_intermediate()?; + match unsafe { unistd::fork()? } { + unistd::ForkResult::Parent { child } => { + receiver + .wait_for_init_ready() + .with_context(|| "Failed to wait for init ready")?; + wait::waitpid(child, None)?; + } + unistd::ForkResult::Child => { + sender + .init_ready() + .with_context(|| "Failed to send init ready")?; + std::process::exit(0); + } + }; + Ok(()) + } + + #[test] + fn test_channel_intermedaite_graceful_exit() -> Result<()> { + let (sender, receiver) = &mut intermediate_to_main()?; + match unsafe { unistd::fork()? } { + unistd::ForkResult::Parent { child } => { + sender.close().context("Failed to close sender")?; + // The child process will exit without send the intermediate ready + // message. This should cause the wait_for_intermediate_ready to error + // out, instead of keep blocking. + let ret = receiver.wait_for_intermediate_ready(); + assert!(ret.is_err()); + wait::waitpid(child, None)?; + } + unistd::ForkResult::Child => { + receiver.close().context("Failed to close receiver")?; + std::process::exit(0); + } + }; + + Ok(()) + } + + #[test] + fn test_channel_init_graceful_exit() -> Result<()> { + let (sender, receiver) = &mut init_to_intermediate()?; match unsafe { unistd::fork()? } { unistd::ForkResult::Parent { child } => { - ch.close_sender().context("Failed to close sender")?; - // The child process will exit without send the child ready - // message. This should cause the wait_for_child_ready to error + sender.close().context("Failed to close sender")?; + // The child process will exit without send the init ready + // message. This should cause the wait_for_init_ready to error // out, instead of keep blocking. - let ret = ch.wait_for_child_ready(); + let ret = receiver.wait_for_init_ready(); assert!(ret.is_err()); wait::waitpid(child, None)?; } unistd::ForkResult::Child => { - ch.close_receiver().context("Failed to close receiver")?; + receiver.close().context("Failed to close receiver")?; std::process::exit(0); } }; diff --git a/src/process/init.rs b/src/process/init.rs index e79f1d1d5..a63db4d08 100644 --- a/src/process/init.rs +++ b/src/process/init.rs @@ -5,7 +5,7 @@ use nix::sched::CloneFlags; use nix::{ fcntl, sys::statfs, - unistd::{self, Gid, Pid, Uid}, + unistd::{self, Gid, Uid}, }; use oci_spec::{LinuxNamespaceType, Spec}; use std::collections::HashMap; @@ -167,8 +167,8 @@ pub struct ContainerInitArgs { pub fn container_intermidiate( args: ContainerInitArgs, - main_to_intermediate: &mut channel::Channel, - intermediate_to_main: &mut channel::Channel, + receiver_from_main: &mut channel::ReceiverFromMain, + sender_to_main: &mut channel::SenderIntermediateToMain, ) -> Result<()> { let command = &args.syscall; let spec = &args.spec; @@ -199,8 +199,8 @@ pub fn container_intermidiate( // child needs to be dumpable, otherwise the non root parent is not // allowed to write the uid/gid maps prctl::set_dumpable(true).unwrap(); - intermediate_to_main.send_identifier_mapping_request()?; - main_to_intermediate.wait_for_mapping_ack()?; + sender_to_main.identifier_mapping_request()?; + receiver_from_main.wait_for_mapping_ack()?; prctl::set_dumpable(false).unwrap(); } @@ -231,7 +231,8 @@ pub fn container_intermidiate( } // We only need for init process to send us the ChildReady. - let child_to_parent = &mut channel::Channel::new()?; + let (sender_to_intermediate, receiver_from_init) = &mut channel::init_to_intermediate()?; + // We resued the args passed in, but replace with a new set of channels. let init_args = ContainerInitArgs { ..args }; // We have to record the pid of the child (container init process), since @@ -239,24 +240,24 @@ pub fn container_intermidiate( // to send us the correct pid. let pid = fork::container_fork(|| { // First thing in the child process to close the unused fds in the channel/pipe. - child_to_parent - .close_receiver() + receiver_from_init + .close() .context("Failed to close receiver in init process")?; - container_init(init_args, child_to_parent) + container_init(init_args, sender_to_intermediate) })?; // Close unused fds in the parent process. - child_to_parent - .close_sender() + sender_to_intermediate + .close() .context("Failed to close sender in the intermediate process")?; // There is no point using the pid returned here, since the child will be // inside the pid namespace already. - child_to_parent - .wait_for_child_ready() + receiver_from_init + .wait_for_init_ready() .context("Failed to wait for the child")?; // After the child (the container init process) becomes ready, we can signal // the parent (the main process) that we are ready. - intermediate_to_main - .send_child_ready(pid) + sender_to_main + .intermediate_ready(pid) .context("Failed to send child ready from intermediate process")?; Ok(()) @@ -264,7 +265,7 @@ pub fn container_intermidiate( pub fn container_init( args: ContainerInitArgs, - init_to_intermediate: &mut channel::Channel, + sender_to_intermediate: &mut channel::SenderInitToIntermediate, ) -> Result<()> { let command = &args.syscall; let spec = &args.spec; @@ -431,7 +432,7 @@ pub fn container_init( // Note, we pass -1 here because we are already inside the pid namespace. // The pid outside the pid namespace should be recorded by the intermediate // process. - init_to_intermediate.send_child_ready(Pid::from_raw(-1))?; + sender_to_intermediate.init_ready()?; // listing on the notify socket for container start command let notify_socket = args.notify_socket; diff --git a/src/process/message.rs b/src/process/message.rs index 0c494437a..8a9cce9cc 100644 --- a/src/process/message.rs +++ b/src/process/message.rs @@ -1,17 +1,19 @@ /// Used as a wrapper for messages to be sent between child and parent processes #[derive(Debug)] pub enum Message { - ChildReady = 0x00, - WriteMapping = 0x01, - MappingWritten = 0x02, + IntermediateReady = 0x00, + InitReady = 0x01, + WriteMapping = 0x02, + MappingWritten = 0x03, } impl From for Message { fn from(from: u8) -> Self { match from { - 0x00 => Message::ChildReady, - 0x01 => Message::WriteMapping, - 0x02 => Message::MappingWritten, + 0x00 => Message::IntermediateReady, + 0x01 => Message::InitReady, + 0x02 => Message::WriteMapping, + 0x03 => Message::MappingWritten, _ => panic!("unknown message."), } }