Skip to content

Commit

Permalink
Initial checkpoint support
Browse files Browse the repository at this point in the history
This adds the first code to checkpoint a container. The checkpoint
command is name 'checkpointt' (with two 't's at the end) so that
container engines like Podman do not think to use this not yet finished
checkpoint restore implementation.

For Podman it is still necessary to tell CRIU that the network namespace
is external at least and restoring needs special handling to support
'--console-socket'.

This currently uses the not officially released CRIU Rust bindings.

Signed-off-by: Adrian Reber <areber@redhat.com>
  • Loading branch information
adrianreber committed Jan 24, 2022
1 parent 03bbfd8 commit 5c4fca1
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 2 deletions.
36 changes: 36 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/libcontainer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ libcgroups = { version = "0.0.2", path = "../libcgroups" }
libseccomp = { version = "0.0.2", path = "../libseccomp" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
rust-criu = { git = "https://github.com/adrianreber/rust-criu" }
wasmer = { version = "2.1.0", optional = true }
wasmer-wasi = { version = "2.1.0", optional = true }

Expand Down
11 changes: 11 additions & 0 deletions crates/libcontainer/src/container/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,17 @@ impl Container {
}
}

/// Checkpoint parameter structure
pub struct CheckpointOptions {
pub ext_unix_sk: bool,
pub file_locks: bool,
pub image_path: PathBuf,
pub leave_running: bool,
pub shell_job: bool,
pub tcp_established: bool,
pub work_path: Option<PathBuf>,
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
89 changes: 89 additions & 0 deletions crates/libcontainer/src/container/container_checkpoint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use super::{Container, ContainerStatus};
use crate::container::container::CheckpointOptions;
use anyhow::{bail, Context, Result};

use oci_spec::runtime::Spec;
use std::os::unix::io::AsRawFd;

const CRIU_CHECKPOINT_LOG_FILE: &str = "dump.log";

impl Container {
pub fn checkpoint(&mut self, opts: &CheckpointOptions) -> Result<()> {
self.refresh_status()
.context("failed to refresh container status")?;

// can_pause() checks if the container is running. That also works for
// checkpoitning. is_running() would make more sense here, but let's
// just reuse existing functions.
if !self.can_pause() {
bail!(
"{} could not be checkpointed because it was {:?}",
self.id(),
self.status()
);
}

let mut criu = rust_criu::Criu::new().unwrap();

// We need to tell CRIU that all bind mounts are external. CRIU will fail checkpointing
// if it does not know that these bind mounts are coming from the outside of the container.
// This information is needed during restore again. The external location of the bind
// mounts can change and CRIU will just mount whatever we tell it to mount based on
// information found in 'config.json'.
let source_spec_path = self.bundle().join("config.json");
let spec = Spec::load(&source_spec_path)?;
let mounts = spec.mounts().clone();
for m in mounts.unwrap() {
if m.typ() == &Some("bind".to_string()) {
let dest = m
.destination()
.clone()
.into_os_string()
.into_string()
.unwrap();
criu.set_external_mount(dest.clone(), dest);
}
}

let directory = std::fs::File::open(&opts.image_path)?;
criu.set_images_dir_fd(directory.as_raw_fd());

// It seems to be necessary to be defined outside of 'if' to
// keep the FD open until CRIU uses it.
let work_dir: std::fs::File;
if let Some(wp) = &opts.work_path {
work_dir = std::fs::File::open(wp)?;
criu.set_work_dir_fd(work_dir.as_raw_fd());
}

criu.set_log_file(CRIU_CHECKPOINT_LOG_FILE.to_string());
criu.set_log_level(4);
criu.set_pid(self.pid().unwrap().into());
criu.set_leave_running(opts.leave_running);
criu.set_ext_unix_sk(opts.ext_unix_sk);
criu.set_shell_job(opts.shell_job);
criu.set_tcp_established(opts.tcp_established);
criu.set_file_locks(opts.file_locks);
criu.set_orphan_pts_master(true);
criu.set_manage_cgroups(true);
if let Err(e) = criu.dump() {
bail!(
"Checkpointing container {} failed with {:?}. Please check CRIU logfile {:?}/{}",
self.id(),
e,
match &opts.work_path {
Some(wp) => wp,
_ => &opts.image_path,
},
CRIU_CHECKPOINT_LOG_FILE
);
}

if !opts.leave_running {
self.set_status(ContainerStatus::Stopped).save()?;
}

log::debug!("container {} checkpointed", self.id());
Ok(())
}
}
2 changes: 2 additions & 0 deletions crates/libcontainer/src/container/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod builder;
mod builder_impl;
#[allow(clippy::module_inception)]
mod container;
mod container_checkpoint;
mod container_delete;
mod container_events;
mod container_kill;
Expand All @@ -17,5 +18,6 @@ mod container_start;
pub mod init_builder;
pub mod state;
pub mod tenant_builder;
pub use container::CheckpointOptions;
pub use container::Container;
pub use state::{ContainerProcessState, ContainerStatus, State};
30 changes: 30 additions & 0 deletions crates/liboci-cli/src/checkpoint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use clap::Parser;
use std::path::PathBuf;

/// Checkpoint a running container
#[derive(Parser, Debug)]
pub struct Checkpoint {
#[clap(forbid_empty_values = true, required = true)]
pub container_id: String,
/// allow external unix sockets
#[clap(long)]
pub ext_unix_sk: bool,
/// allow file locks
#[clap(long)]
pub file_locks: bool,
/// path for saving criu image files
#[clap(long, default_value = "checkpoint")]
pub image_path: PathBuf,
/// leave the process running after checkpointing
#[clap(long)]
pub leave_running: bool,
/// allow shell jobs
#[clap(long)]
pub shell_job: bool,
/// allow open tcp connections
#[clap(long)]
pub tcp_established: bool,
/// path for saving work files and logs
#[clap(long)]
pub work_path: Option<PathBuf>,
}
6 changes: 4 additions & 2 deletions crates/liboci-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod state;
pub use {create::Create, delete::Delete, kill::Kill, start::Start, state::State};

// Other common subcommands that aren't specified in the document
mod checkpoint;
mod events;
mod exec;
mod list;
Expand All @@ -25,8 +26,8 @@ mod spec;
mod update;

pub use {
events::Events, exec::Exec, list::List, pause::Pause, ps::Ps, resume::Resume, run::Run,
spec::Spec, update::Update,
checkpoint::Checkpoint, events::Events, exec::Exec, list::List, pause::Pause, ps::Ps,
resume::Resume, run::Run, spec::Spec, update::Update,
};

// Subcommands parsed by liboci-cli, based on the [OCI
Expand All @@ -48,6 +49,7 @@ pub enum StandardCmd {
// and other runtimes.
#[derive(Parser, Debug)]
pub enum CommonCmd {
Checkpointt(Checkpoint),
Events(Events),
Exec(Exec),
List(List),
Expand Down
24 changes: 24 additions & 0 deletions crates/youki/src/commands/checkpoint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//! Contains functionality of pause container command
use crate::commands::load_container;
use std::path::PathBuf;

use anyhow::{Context, Result};

use liboci_cli::Checkpoint;

pub fn checkpoint(args: Checkpoint, root_path: PathBuf) -> Result<()> {
log::debug!("start checkpointing container {}", args.container_id);
let mut container = load_container(root_path, &args.container_id)?;
let opts = libcontainer::container::CheckpointOptions {
ext_unix_sk: args.ext_unix_sk,
file_locks: args.file_locks,
image_path: args.image_path,
leave_running: args.leave_running,
shell_job: args.shell_job,
tcp_established: args.tcp_established,
work_path: args.work_path,
};
container
.checkpoint(&opts)
.with_context(|| format!("failed to checkpoint container {}", args.container_id))
}
1 change: 1 addition & 0 deletions crates/youki/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::{
use libcgroups::common::CgroupManager;
use libcontainer::container::Container;

pub mod checkpoint;
pub mod completion;
pub mod create;
pub mod delete;
Expand Down
3 changes: 3 additions & 0 deletions crates/youki/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ fn main() -> Result<()> {
StandardCmd::State(state) => commands::state::state(state, root_path),
},
SubCommand::Common(cmd) => match cmd {
CommonCmd::Checkpointt(checkpoint) => {
commands::checkpoint::checkpoint(checkpoint, root_path)
}
CommonCmd::Events(events) => commands::events::events(events, root_path),
CommonCmd::Exec(exec) => commands::exec::exec(exec, root_path),
CommonCmd::List(list) => commands::list::list(list, root_path),
Expand Down

0 comments on commit 5c4fca1

Please sign in to comment.