diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6550009..e29dc437 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -114,9 +114,6 @@ jobs: name: "Privileged testing" needs: build runs-on: ubuntu-latest - container: - image: quay.io/fedora/fedora-coreos:testing-devel - options: "--privileged --pid=host -v /run/systemd:/run/systemd -v /:/run/host" steps: - name: Checkout repository uses: actions/checkout@v3 @@ -125,7 +122,7 @@ jobs: with: name: bootc - name: Install - run: install bootc /usr/bin && rm -v bootc + run: sudo install bootc /usr/bin && rm -v bootc - name: Integration tests - run: bootc internal-tests run-privileged-integration + run: sudo podman run --rm -ti --privileged -v /run/systemd:/run/systemd -v /:/run/host -v /usr/bin/bootc:/usr/bin/bootc --pid=host quay.io/fedora/fedora-coreos:testing-devel bootc internal-tests run-privileged-integration diff --git a/.gitignore b/.gitignore index 72355c7c..86ed9b35 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ example - +.cosa +_kola_temp +bootc.tar.zst # Added by cargo - /target Cargo.lock -bootc.tar.zst diff --git a/Makefile b/Makefile index 7a0edf42..947c5cbb 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ bin-archive: all $(MAKE) install DESTDIR=tmp-install && tar --zstd -C tmp-install -cf bootc.tar.zst . && rm tmp-install -rf install-kola-tests: - install -D -t $(DESTDIR)$(prefix)/lib/coreos-assembler/tests/kola/bootc tests/kolainst/basic + install -D -t $(DESTDIR)$(prefix)/lib/coreos-assembler/tests/kola/bootc tests/kolainst/* vendor: cargo xtask $@ diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8473ed11..0264d09f 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -15,15 +15,23 @@ ostree-ext = "0.10.5" clap = { version= "3.2", features = ["derive"] } clap_mangen = { version = "0.1", optional = true } cap-std-ext = "1.0.1" +hex = "^0.4" fn-error-context = "0.2.0" +gvariant = "0.4.0" indicatif = "0.17.0" +libc = "^0.2" +once_cell = "1.9" +openssl = "^0.10" +nix = ">= 0.24, < 0.26" serde = { features = ["derive"], version = "1.0.125" } serde_json = "1.0.64" +serde_with = ">= 1.9.4, < 2" tokio = { features = ["io-std", "time", "process", "rt", "net"], version = ">= 1.13.0" } tokio-util = { features = ["io-util"], version = "0.7" } tracing = "0.1" tempfile = "3.3.0" xshell = { version = "0.2", optional = true } +uuid = { version = "1.2.2", features = ["v4"] } [features] default = [] diff --git a/lib/src/blockdev.rs b/lib/src/blockdev.rs new file mode 100644 index 00000000..80d99436 --- /dev/null +++ b/lib/src/blockdev.rs @@ -0,0 +1,162 @@ +use crate::task::Task; +use crate::utils::run_in_host_mountns; +use anyhow::{anyhow, Context, Result}; +use camino::Utf8Path; +use fn_error_context::context; +use nix::errno::Errno; +use serde::Deserialize; +use std::fs::File; +use std::os::unix::io::AsRawFd; +use std::process::Command; + +#[derive(Debug, Deserialize)] +struct DevicesOutput { + blockdevices: Vec, +} + +#[allow(dead_code)] +#[derive(Debug, Deserialize)] +pub(crate) struct Device { + pub(crate) name: String, + pub(crate) serial: Option, + pub(crate) model: Option, + pub(crate) label: Option, + pub(crate) fstype: Option, + pub(crate) children: Option>, +} + +impl Device { + #[allow(dead_code)] + // RHEL8's lsblk doesn't have PATH, so we do it + pub(crate) fn path(&self) -> String { + format!("/dev/{}", &self.name) + } + + pub(crate) fn has_children(&self) -> bool { + self.children.as_ref().map_or(false, |v| !v.is_empty()) + } +} + +pub(crate) fn wipefs(dev: &Utf8Path) -> Result<()> { + Task::new_and_run( + &format!("Wiping device {dev}"), + "wipefs", + ["-a", dev.as_str()], + ) +} + +fn list_impl(dev: Option<&Utf8Path>) -> Result> { + let o = Command::new("lsblk") + .args(["-J", "-o", "NAME,SERIAL,MODEL,LABEL,FSTYPE"]) + .args(dev) + .output()?; + if !o.status.success() { + return Err(anyhow::anyhow!("Failed to list block devices")); + } + let devs: DevicesOutput = serde_json::from_reader(&*o.stdout)?; + Ok(devs.blockdevices) +} + +#[context("Listing device {dev}")] +pub(crate) fn list_dev(dev: &Utf8Path) -> Result { + let devices = list_impl(Some(dev))?; + devices + .into_iter() + .next() + .ok_or_else(|| anyhow!("no device output from lsblk for {dev}")) +} + +#[allow(dead_code)] +pub(crate) fn list() -> Result> { + list_impl(None) +} + +pub(crate) fn udev_settle() -> Result<()> { + // There's a potential window after rereading the partition table where + // udevd hasn't yet received updates from the kernel, settle will return + // immediately, and lsblk won't pick up partition labels. Try to sleep + // our way out of this. + std::thread::sleep(std::time::Duration::from_millis(200)); + + let st = run_in_host_mountns("udevadm").arg("settle").status()?; + if !st.success() { + anyhow::bail!("Failed to run udevadm settle: {st:?}"); + } + Ok(()) +} + +#[allow(unsafe_code)] +pub(crate) fn reread_partition_table(file: &mut File, retry: bool) -> Result<()> { + let fd = file.as_raw_fd(); + // Reread sometimes fails inexplicably. Retry several times before + // giving up. + let max_tries = if retry { 20 } else { 1 }; + for retries in (0..max_tries).rev() { + let result = unsafe { ioctl::blkrrpart(fd) }; + match result { + Ok(_) => break, + Err(err) if retries == 0 && err == Errno::EINVAL => { + return Err(err) + .context("couldn't reread partition table: device may not support partitions") + } + Err(err) if retries == 0 && err == Errno::EBUSY => { + return Err(err).context("couldn't reread partition table: device is in use") + } + Err(err) if retries == 0 => return Err(err).context("couldn't reread partition table"), + Err(_) => std::thread::sleep(std::time::Duration::from_millis(100)), + } + } + Ok(()) +} + +// create unsafe ioctl wrappers +#[allow(clippy::missing_safety_doc)] +mod ioctl { + use libc::c_int; + use nix::{ioctl_none, ioctl_read, ioctl_read_bad, libc, request_code_none}; + ioctl_none!(blkrrpart, 0x12, 95); + ioctl_read_bad!(blksszget, request_code_none!(0x12, 104), c_int); + ioctl_read!(blkgetsize64, 0x12, 114, libc::size_t); +} + +/// Parse a string into mibibytes +pub(crate) fn parse_size_mib(mut s: &str) -> Result { + let suffixes = [ + ("MiB", 1u64), + ("M", 1u64), + ("GiB", 1024), + ("G", 1024), + ("TiB", 1024 * 1024), + ("T", 1024 * 1024), + ]; + let mut mul = 1u64; + for (suffix, imul) in suffixes { + if let Some((sv, rest)) = s.rsplit_once(suffix) { + if !rest.is_empty() { + anyhow::bail!("Trailing text after size: {rest}"); + } + s = sv; + mul = imul; + } + } + let v = s.parse::()?; + Ok(v * mul) +} + +#[test] +fn test_parse_size_mib() { + let ident_cases = [0, 10, 9, 1024].into_iter().map(|k| (k.to_string(), k)); + let cases = [ + ("0M", 0), + ("10M", 10), + ("10MiB", 10), + ("1G", 1024), + ("9G", 9216), + ("11T", 11 * 1024 * 1024), + ] + .into_iter() + .map(|(k, v)| (k.to_string(), v)); + for (s, v) in ident_cases.chain(cases) { + assert_eq!(parse_size_mib(&s).unwrap(), v as u64, "Parsing {s}"); + } +} diff --git a/lib/src/bootloader.rs b/lib/src/bootloader.rs new file mode 100644 index 00000000..c2f9d609 --- /dev/null +++ b/lib/src/bootloader.rs @@ -0,0 +1,105 @@ +use std::os::unix::prelude::PermissionsExt; + +use anyhow::{Context, Result}; +use camino::Utf8Path; +use cap_std::fs::Dir; +use cap_std::fs::Permissions; +use cap_std_ext::cap_std; +use cap_std_ext::prelude::*; +use fn_error_context::context; + +use crate::task::Task; + +/// This variable is referenced by our GRUB fragment +pub(crate) const IGNITION_VARIABLE: &str = "$ignition_firstboot"; +const GRUB_BOOT_UUID_FILE: &str = "bootuuid.cfg"; +const STATIC_GRUB_CFG: &str = include_str!("grub.cfg"); +const STATIC_GRUB_CFG_EFI: &str = include_str!("grub-efi.cfg"); + +fn install_grub2_efi(efidir: &Dir, uuid: &str) -> Result<()> { + let mut vendordir = None; + let efidir = efidir.open_dir("EFI").context("Opening EFI/")?; + for child in efidir.entries()? { + let child = child?; + let name = child.file_name(); + let name = if let Some(name) = name.to_str() { + name + } else { + continue; + }; + if name == "BOOT" { + continue; + } + if !child.file_type()?.is_dir() { + continue; + } + vendordir = Some(child.open_dir()?); + break; + } + let vendordir = vendordir.ok_or_else(|| anyhow::anyhow!("Failed to find EFI vendor dir"))?; + vendordir + .atomic_write("grub.cfg", STATIC_GRUB_CFG_EFI) + .context("Writing static EFI grub.cfg")?; + vendordir + .atomic_write(GRUB_BOOT_UUID_FILE, uuid) + .with_context(|| format!("Writing {GRUB_BOOT_UUID_FILE}"))?; + + Ok(()) +} + +#[context("Installing bootloader")] +pub(crate) fn install_via_bootupd( + device: &Utf8Path, + rootfs: &Utf8Path, + boot_uuid: &uuid::Uuid, +) -> Result<()> { + Task::new_and_run( + "Running bootupctl to install bootloader", + "bootupctl", + ["backend", "install", "--src-root", "/", rootfs.as_str()], + )?; + + let grub2_uuid_contents = format!("set BOOT_UUID=\"{boot_uuid}\"\n"); + + let bootfs = &rootfs.join("boot"); + + { + let efidir = Dir::open_ambient_dir(&bootfs.join("efi"), cap_std::ambient_authority())?; + install_grub2_efi(&efidir, &grub2_uuid_contents)?; + } + + let grub2 = &bootfs.join("grub2"); + std::fs::create_dir(grub2).context("creating boot/grub2")?; + let grub2 = Dir::open_ambient_dir(grub2, cap_std::ambient_authority())?; + // Mode 0700 to support passwords etc. + grub2.set_permissions(".", Permissions::from_mode(0o700))?; + grub2 + .atomic_write_with_perms( + "grub.cfg", + STATIC_GRUB_CFG, + cap_std::fs::Permissions::from_mode(0o600), + ) + .context("Writing grub.cfg")?; + + grub2 + .atomic_write_with_perms( + GRUB_BOOT_UUID_FILE, + grub2_uuid_contents, + Permissions::from_mode(0o644), + ) + .with_context(|| format!("Writing {GRUB_BOOT_UUID_FILE}"))?; + + Task::new("Installing BIOS grub2", "grub2-install") + .args([ + "--target", + "i386-pc", + "--boot-directory", + bootfs.as_str(), + "--modules", + "mdraid1x", + device.as_str(), + ]) + .run()?; + + Ok(()) +} diff --git a/lib/src/cli.rs b/lib/src/cli.rs index 42fcafec..403a43a4 100644 --- a/lib/src/cli.rs +++ b/lib/src/cli.rs @@ -94,6 +94,8 @@ pub(crate) enum Opt { Switch(SwitchOpts), /// Display status Status(StatusOpts), + /// Install to the target block device + Install(crate::install::InstallOpts), /// Internal integration testing helpers. #[clap(hide(true), subcommand)] #[cfg(feature = "internal-testing-api")] @@ -210,7 +212,9 @@ async fn stage( #[context("Preparing for write")] async fn prepare_for_write() -> Result<()> { ensure_self_unshared_mount_namespace().await?; - ostree_ext::selinux::verify_install_domain()?; + if crate::lsm::selinux_enabled()? { + crate::lsm::selinux_ensure_install()?; + } Ok(()) } @@ -319,6 +323,7 @@ where match opt { Opt::Upgrade(opts) => upgrade(opts).await, Opt::Switch(opts) => switch(opts).await, + Opt::Install(opts) => crate::install::install(opts).await, Opt::Status(opts) => super::status::status(opts).await, #[cfg(feature = "internal-testing-api")] Opt::InternalTests(ref opts) => { diff --git a/lib/src/containerenv.rs b/lib/src/containerenv.rs new file mode 100644 index 00000000..485bd0aa --- /dev/null +++ b/lib/src/containerenv.rs @@ -0,0 +1,47 @@ +//! Helpers for parsing the `/run/.containerenv` file generated by podman. + +use std::fs::File; +use std::io::{BufRead, BufReader}; + +use anyhow::{Context, Result}; +use fn_error_context::context; + +const PATH: &str = "/run/.containerenv"; + +#[derive(Debug, Default)] +pub(crate) struct ContainerExecutionInfo { + pub(crate) engine: String, + pub(crate) name: String, + pub(crate) id: String, + pub(crate) image: String, + pub(crate) imageid: String, +} + +/// Load and parse the `/run/.containerenv` file. +#[context("Parsing {PATH}")] +pub(crate) fn get_container_execution_info() -> Result { + let f = File::open(PATH) + .with_context(|| format!("Opening {PATH}")) + .map(BufReader::new)?; + let mut r = ContainerExecutionInfo::default(); + for line in f.lines() { + let line = line?; + let line = line.trim(); + let (k, v) = if let Some(v) = line.split_once('=') { + v + } else { + continue; + }; + // Assuming there's no quotes here + let v = v.trim_start_matches('"').trim_end_matches('"'); + match k { + "engine" => r.engine = v.to_string(), + "name" => r.name = v.to_string(), + "id" => r.id = v.to_string(), + "image" => r.image = v.to_string(), + "imageid" => r.imageid = v.to_string(), + _ => {} + } + } + Ok(r) +} diff --git a/lib/src/grub-efi.cfg b/lib/src/grub-efi.cfg new file mode 100644 index 00000000..08ddae10 --- /dev/null +++ b/lib/src/grub-efi.cfg @@ -0,0 +1,18 @@ +if [ -e (md/md-boot) ]; then + # The search command might pick a RAID component rather than the RAID, + # since the /boot RAID currently uses superblock 1.0. See the comment in + # the main grub.cfg. + set prefix=md/md-boot +else + if [ -f ${config_directory}/bootuuid.cfg ]; then + source ${config_directory}/bootuuid.cfg + fi + if [ -n "${BOOT_UUID}" ]; then + search --fs-uuid "${BOOT_UUID}" --set prefix --no-floppy + else + search --label boot --set prefix --no-floppy + fi +fi +set prefix=($prefix)/grub2 +configfile $prefix/grub.cfg +boot diff --git a/lib/src/grub.cfg b/lib/src/grub.cfg new file mode 100644 index 00000000..f2188f48 --- /dev/null +++ b/lib/src/grub.cfg @@ -0,0 +1,95 @@ +# This file is copied from https://github.com/coreos/coreos-assembler/blob/main/src/grub.cfg +set pager=1 +# petitboot doesn't support -e and doesn't support an empty path part +if [ -d (md/md-boot)/grub2 ]; then + # fcct currently creates /boot RAID with superblock 1.0, which allows + # component partitions to be read directly as filesystems. This is + # necessary because transposefs doesn't yet rerun grub2-install on BIOS, + # so GRUB still expects /boot to be a partition on the first disk. + # + # There are two consequences: + # 1. On BIOS and UEFI, the search command might pick an individual RAID + # component, but we want it to use the full RAID in case there are bad + # sectors etc. The undocumented --hint option is supposed to support + # this sort of override, but it doesn't seem to work, so we set $boot + # directly. + # 2. On BIOS, the "normal" module has already been loaded from an + # individual RAID component, and $prefix still points there. We want + # future module loads to come from the RAID, so we reset $prefix. + # (On UEFI, the stub grub.cfg has already set $prefix properly.) + set boot=md/md-boot + set prefix=($boot)/grub2 +else + if [ -f ${config_directory}/bootuuid.cfg ]; then + source ${config_directory}/bootuuid.cfg + fi + if [ -n "${BOOT_UUID}" ]; then + search --fs-uuid "${BOOT_UUID}" --set boot --no-floppy + else + search --label boot --set boot --no-floppy + fi +fi +set root=$boot + +if [ -f ${config_directory}/grubenv ]; then + load_env -f ${config_directory}/grubenv +elif [ -s $prefix/grubenv ]; then + load_env +fi + +if [ x"${feature_menuentry_id}" = xy ]; then + menuentry_id_option="--id" +else + menuentry_id_option="" +fi + +function load_video { + if [ x$feature_all_video_module = xy ]; then + insmod all_video + else + insmod efi_gop + insmod efi_uga + insmod ieee1275_fb + insmod vbe + insmod vga + insmod video_bochs + insmod video_cirrus + fi +} + +# Any non-default console settings will be inserted here. +# CONSOLE-SETTINGS-START +# CONSOLE-SETTINGS-END + +if [ x$feature_timeout_style = xy ] ; then + set timeout_style=menu + set timeout=1 +# Fallback normal timeout code in case the timeout_style feature is +# unavailable. +else + set timeout=1 +fi + +# Determine if this is a first boot and set the ${ignition_firstboot} variable +# which is used in the kernel command line. +set ignition_firstboot="" +if [ -f "/ignition.firstboot" ]; then + # Default networking parameters to be used with ignition. + set ignition_network_kcmdline='' + + # Source in the `ignition.firstboot` file which could override the + # above $ignition_network_kcmdline with static networking config. + # This override feature is also by coreos-installer to persist static + # networking config provided during install to the first boot of the machine. + source "/ignition.firstboot" + + set ignition_firstboot="ignition.firstboot ${ignition_network_kcmdline}" +fi + +# Import user defined configuration +# tracker: https://github.com/coreos/fedora-coreos-tracker/issues/805 +if [ -f $prefix/user.cfg ]; then + source $prefix/user.cfg +fi + +blscfg diff --git a/lib/src/ignition.rs b/lib/src/ignition.rs new file mode 100644 index 00000000..70937508 --- /dev/null +++ b/lib/src/ignition.rs @@ -0,0 +1,321 @@ +// Copyright 2019 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{bail, ensure, Context, Error, Result}; +use camino::Utf8Path; +use fn_error_context::context; +use openssl::hash::{Hasher, MessageDigest}; +use openssl::sha; +use serde::{Deserialize, Serialize}; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use std::fmt; +use std::fs; +use std::fs::{File, OpenOptions}; +use std::io::{self, Read, Seek, Write}; +use std::os::unix::io::AsRawFd; +use std::os::unix::prelude::PermissionsExt; +use std::path::Path; +use std::str::FromStr; + +/// The name of the file read by our bootloader config +const FIRSTBOOT: &str = "ignition.firstboot"; +/// Kernel argument injected to signal we're on bare metal +pub(crate) const PLATFORM_METAL_KARG: &str = "ignition.platform.id=metal"; + +/// Ignition-style message digests +#[derive(Debug, Clone, DeserializeFromStr, SerializeDisplay, PartialEq, Eq)] +pub enum IgnitionHash { + /// SHA-256 digest. + Sha256(Vec), + /// SHA-512 digest. + Sha512(Vec), +} + +/// Digest implementation. Helpfully, each digest in openssl::sha has a +/// different type. +enum IgnitionHasher { + Sha256(sha::Sha256), + Sha512(sha::Sha512), +} + +impl FromStr for IgnitionHash { + type Err = Error; + + /// Try to parse an hash-digest argument. + /// + /// This expects an input value following the `ignition.config.verification.hash` + /// spec, i.e. `-` format. + fn from_str(input: &str) -> Result { + let parts: Vec<_> = input.splitn(2, '-').collect(); + if parts.len() != 2 { + bail!("failed to detect hash-type and digest in '{}'", input); + } + let (hash_kind, hex_digest) = (parts[0], parts[1]); + + let hash = match hash_kind { + "sha256" => { + let digest = hex::decode(hex_digest).context("decoding hex digest")?; + ensure!( + digest.len().saturating_mul(8) == 256, + "wrong digest length ({})", + digest.len().saturating_mul(8) + ); + IgnitionHash::Sha256(digest) + } + "sha512" => { + let digest = hex::decode(hex_digest).context("decoding hex digest")?; + ensure!( + digest.len().saturating_mul(8) == 512, + "wrong digest length ({})", + digest.len().saturating_mul(8) + ); + IgnitionHash::Sha512(digest) + } + x => bail!("unknown hash type '{}'", x), + }; + + Ok(hash) + } +} + +impl fmt::Display for IgnitionHash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (kind, value) = match self { + Self::Sha256(v) => ("sha256", v), + Self::Sha512(v) => ("sha512", v), + }; + write!(f, "{}-{}", kind, hex::encode(value)) + } +} + +impl IgnitionHash { + /// Digest and validate input data. + pub fn validate(&self, input: &mut impl Read) -> Result<()> { + let (mut hasher, digest) = match self { + IgnitionHash::Sha256(val) => (IgnitionHasher::Sha256(sha::Sha256::new()), val), + IgnitionHash::Sha512(val) => (IgnitionHasher::Sha512(sha::Sha512::new()), val), + }; + let mut buf = [0u8; 128 * 1024]; + loop { + match input.read(&mut buf) { + Ok(0) => break, + Ok(n) => match hasher { + IgnitionHasher::Sha256(ref mut h) => h.update(&buf[..n]), + IgnitionHasher::Sha512(ref mut h) => h.update(&buf[..n]), + }, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e).context("reading input"), + }; + } + let computed = match hasher { + IgnitionHasher::Sha256(h) => h.finish().to_vec(), + IgnitionHasher::Sha512(h) => h.finish().to_vec(), + }; + + if &computed != digest { + bail!( + "hash mismatch, computed '{}' but expected '{}'", + hex::encode(computed), + hex::encode(digest), + ); + } + + Ok(()) + } +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +pub struct Sha256Digest(pub [u8; 32]); + +impl TryFrom for Sha256Digest { + type Error = Error; + + fn try_from(mut hasher: Hasher) -> std::result::Result { + let digest = hasher.finish().context("finishing hash")?; + Ok(Sha256Digest( + digest.as_ref().try_into().context("converting to SHA256")?, + )) + } +} + +impl Sha256Digest { + /// Calculates the SHA256 of a file. + #[allow(dead_code)] + pub(crate) fn from_path(path: &Path) -> Result { + let mut f = OpenOptions::new() + .read(true) + .open(path) + .with_context(|| format!("opening {:?}", path))?; + + Self::from_file(&mut f) + } + + /// Calculates the SHA256 of an opened file. Note that the underlying file descriptor will have + /// `posix_fadvise` called on it to optimize for sequential reading. + #[allow(unsafe_code)] + pub fn from_file(f: &mut std::fs::File) -> Result { + // tell kernel to optimize for sequential reading + if unsafe { libc::posix_fadvise(f.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL) } < 0 { + eprintln!( + "posix_fadvise(SEQUENTIAL) failed (errno {}) -- ignoring...", + nix::errno::errno() + ); + } + + Self::from_reader(f) + } + + /// Calculates the SHA256 of a reader. + pub fn from_reader(r: &mut impl Read) -> Result { + let mut hasher = Hasher::new(MessageDigest::sha256()).context("creating SHA256 hasher")?; + std::io::copy(r, &mut hasher)?; + hasher.try_into() + } + + #[allow(dead_code)] + pub(crate) fn to_hex_string(&self) -> Result { + let mut buf: Vec = Vec::with_capacity(64); + for i in 0..32 { + write!(buf, "{:02x}", self.0[i])?; + } + Ok(String::from_utf8(buf)?) + } +} + +pub struct WriteHasher { + writer: W, + hasher: Hasher, +} + +impl WriteHasher { + #[allow(dead_code)] + pub fn new(writer: W, hasher: Hasher) -> Self { + WriteHasher { writer, hasher } + } + + #[allow(dead_code)] + pub fn new_sha256(writer: W) -> Result { + let hasher = Hasher::new(MessageDigest::sha256()).context("creating SHA256 hasher")?; + Ok(WriteHasher { writer, hasher }) + } +} + +impl Write for WriteHasher { + fn write(&mut self, buf: &[u8]) -> io::Result { + if buf.is_empty() { + return Ok(0); + } + + let n = self.writer.write(buf)?; + self.hasher.write_all(&buf[..n])?; + + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.writer.flush()?; + self.hasher.flush()?; + Ok(()) + } +} + +impl TryFrom> for Sha256Digest { + type Error = Error; + + fn try_from(wrapper: WriteHasher) -> std::result::Result { + Sha256Digest::try_from(wrapper.hasher) + } +} + +/// Write the Ignition config. +#[context("Writing ignition")] +pub(crate) fn write_ignition( + mountpoint: &Utf8Path, + digest_in: &Option, + mut config_in: &File, +) -> Result<()> { + // Verify configuration digest, if any. + if let Some(digest) = &digest_in { + digest + .validate(&mut config_in) + .context("failed to validate Ignition configuration digest")?; + config_in + .seek(io::SeekFrom::Start(0)) + .context("rewinding Ignition configuration file")?; + }; + + // make parent directory + let mut config_dest = mountpoint.to_path_buf(); + config_dest.push("ignition"); + if !config_dest.is_dir() { + fs::create_dir_all(&config_dest) + .with_context(|| format!("creating Ignition config directory {config_dest}"))?; + // Ignition data may contain secrets; restrict to root + fs::set_permissions(&config_dest, fs::Permissions::from_mode(0o700)) + .with_context(|| format!("setting file mode for Ignition directory {config_dest}"))?; + } + + // do the copy + config_dest.push("config.ign"); + let mut config_out = OpenOptions::new() + .write(true) + .create_new(true) + .open(&config_dest) + .with_context(|| format!("opening destination Ignition config {config_dest}"))?; + // Ignition config may contain secrets; restrict to root + fs::set_permissions(&config_dest, fs::Permissions::from_mode(0o600)).with_context(|| { + format!("setting file mode for destination Ignition config {config_dest}") + })?; + io::copy(&mut config_in, &mut config_out).context("writing Ignition config")?; + + Ok(()) +} + +/// Enable Ignition to run on the next boot +#[context("Enabling Ignition firstboot")] +pub(crate) fn enable_firstboot(mountpoint: &Utf8Path) -> Result<()> { + fs::write(mountpoint.join(FIRSTBOOT), b"").map_err(Into::into) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ignition_hash_cli_parse() { + let err_cases = vec!["", "foo-bar", "-bar", "sha512", "sha512-", "sha512-00"]; + for arg in err_cases { + IgnitionHash::from_str(arg).expect_err(&format!("input: {}", arg)); + } + + let null_digest = "sha512-cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"; + IgnitionHash::from_str(null_digest).unwrap(); + } + + #[test] + fn test_ignition_hash_validate() { + let input = vec![b'a', b'b', b'c']; + let hash_args = [ + (true, "sha256-ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"), + (true, "sha512-ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"), + (false, "sha256-aa7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"), + (false, "sha512-cdaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f") + ]; + for (valid, hash_arg) in &hash_args { + let hasher = IgnitionHash::from_str(&hash_arg).unwrap(); + let mut rd = std::io::Cursor::new(&input); + assert!(hasher.validate(&mut rd).is_ok() == *valid); + } + } +} diff --git a/lib/src/install.rs b/lib/src/install.rs new file mode 100644 index 00000000..22c299f3 --- /dev/null +++ b/lib/src/install.rs @@ -0,0 +1,770 @@ +use std::fmt::Display; +use std::process::Command; +use std::process::Stdio; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use camino::Utf8Path; +use camino::Utf8PathBuf; +use cap_std_ext::cap_std; +use clap::ArgEnum; +use fn_error_context::context; +use ostree::gio; +use ostree_ext::container as ostree_container; +use ostree_ext::container::SignatureSource; +use ostree_ext::ostree; +use ostree_ext::prelude::Cast; +use serde::Serialize; + +use crate::containerenv::ContainerExecutionInfo; +use crate::lsm::lsm_label; +use crate::task::Task; +use crate::utils::run_in_host_mountns; + +/// The default "stateroot" or "osname"; see https://github.com/ostreedev/ostree/issues/2794 +const STATEROOT_DEFAULT: &str = "default"; + +/// Directory for transient runtime state +const RUN_BOOTC: &str = "/run/bootc"; + +#[derive(clap::ValueEnum, Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum BlockSetup { + Direct, + Tpm2Luks, +} + +impl Default for BlockSetup { + fn default() -> Self { + Self::Direct + } +} + +#[derive(clap::ValueEnum, Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum Filesystem { + Xfs, + Ext4, + Btrfs, +} + +impl Default for Filesystem { + fn default() -> Self { + // Obviously this should be configurable. + Self::Xfs + } +} + +impl Display for Filesystem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_possible_value().unwrap().get_name().fmt(f) + } +} + +/// Kernel argument used to specify we want the rootfs mounted read-write by default +const RW_KARG: &str = "rw"; + +const BOOTPN: u32 = 3; +// This ensures we end up under 512 to be small-sized. +const BOOTPN_SIZE_MB: u32 = 510; +const ROOTPN: u32 = 4; +// TODO calculate from ostree commit +const ROOTFS_SIZE_MB: u32 = 5 * 1024; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +const EFIPN: u32 = 2; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +const EFIPN_SIZE_MB: u32 = 512; +#[cfg(target_arch = "aarch64")] +const RESERVEDPN: u32 = 1; +#[cfg(target_arch = "ppc64")] +const PREPPN: u32 = 1; +#[cfg(target_arch = "ppc64")] +const RESERVEDPN: u32 = 1; + +/// Perform an upgrade operation +#[derive(Debug, Clone, clap::Parser)] +pub(crate) struct InstallOpts { + /// Target block device for installation. The entire device will be wiped. + pub(crate) device: Utf8PathBuf, + + /// Automatically wipe all existing data on device + #[clap(long)] + pub(crate) wipe: bool, + + /// Size of the root partition (default specifier: M). Allowed specifiers: M (mebibytes), G (gibibytes), T (tebibytes) + #[clap(long)] + pub(crate) root_size: Option, + + /// The transport; e.g. oci, oci-archive. Defaults to `registry`. + #[clap(long, default_value = "registry")] + pub(crate) target_transport: String, + + /// Specify the image to fetch for subsequent updates + #[clap(long)] + pub(crate) target_imgref: Option, + + /// Explicitly opt-out of requiring any form of signature verification. + #[clap(long)] + pub(crate) target_no_signature_verification: bool, + + /// Enable verification via an ostree remote + #[clap(long)] + pub(crate) target_ostree_remote: Option, + + /// Target root filesystem type. + #[clap(long, value_enum, default_value_t)] + pub(crate) filesystem: Filesystem, + + /// Path to an Ignition config file + #[clap(long, value_parser)] + pub(crate) ignition_file: Option, + + /// Digest (type-value) of the Ignition config + /// + /// Verify that the Ignition config matches the specified digest, + /// formatted as -. can be sha256 or sha512. + #[clap(long, value_name = "digest", value_parser)] + pub(crate) ignition_hash: Option, + + /// Target root block device setup. + /// + /// direct: Filesystem written directly to block device + /// tpm2-luks: Bind unlock of filesystem to presence of the default tpm2 device. + #[clap(long, value_enum, default_value_t)] + pub(crate) block_setup: BlockSetup, + + /// Disable SELinux in the target (installed) system. + /// + /// This is currently necessary to install *from* a system with SELinux disabled + /// but where the target does have SELinux enabled. + #[clap(long)] + pub(crate) disable_selinux: bool, + + // Only occupy at most this much space (if no units are provided, GB is assumed). + // Using this option reserves space for partitions created dynamically on the + // next boot, or by subsequent tools. + // pub(crate) size: Option, + #[clap(long)] + /// Add a kernel argument + karg: Option>, +} + +// Shared read-only global state +struct State { + opts: InstallOpts, + /// Path to our devtmpfs + devdir: Utf8PathBuf, + mntdir: Utf8PathBuf, +} + +/// Path to initially deployed version information +const BOOTC_ALEPH_PATH: &str = ".bootc-aleph.json"; + +/// The "aleph" version information is injected into /root/.bootc-aleph.json +/// and contains the image ID that was initially used to install. This can +/// be used to trace things like the specific version of `mkfs.ext4` or +/// kernel version that was used. +#[derive(Debug, Serialize)] +struct InstallAleph { + /// Digested pull spec for installed image + image: String, + kernel: String, +} + +fn sgdisk_partition( + sgdisk: &mut Command, + n: u32, + part: impl AsRef, + name: impl AsRef, + typecode: Option<&str>, +) { + sgdisk.arg("-n"); + sgdisk.arg(format!("{n}:{}", part.as_ref())); + sgdisk.arg("-c"); + sgdisk.arg(format!("{n}:{}", name.as_ref())); + if let Some(typecode) = typecode { + sgdisk.arg("-t"); + sgdisk.arg(format!("{n}:{typecode}")); + } +} + +fn mkfs<'a>( + dev: &str, + fs: Filesystem, + label: Option<&'_ str>, + opts: impl IntoIterator, +) -> Result { + let u = uuid::Uuid::new_v4(); + let mut t = Task::new("Creating filesystem", &format!("mkfs.{fs}")); + match fs { + Filesystem::Xfs => { + t.cmd.arg("-m"); + t.cmd.arg(format!("uuid={u}")); + } + Filesystem::Btrfs | Filesystem::Ext4 => { + t.cmd.arg("-U"); + t.cmd.arg(u.to_string()); + } + }; + // Today all the above mkfs commands take -L + if let Some(label) = label { + t.cmd.args(["-L", label]); + } + t.cmd.args(opts); + t.cmd.arg(dev); + // All the mkfs commands are unnecessarily noisy by default + t.cmd.stdout(Stdio::null()); + t.run()?; + Ok(u) +} + +fn mount(dev: &str, target: &Utf8Path) -> Result<()> { + Task::new_and_run( + &format!("Mounting {target}"), + "mount", + [dev, target.as_str()], + ) +} + +fn bind_mount_from_host(src: impl AsRef, dest: impl AsRef) -> Result<()> { + let src = src.as_ref(); + let dest = dest.as_ref(); + tracing::debug!("Mounting host {src} to {dest}"); + std::fs::create_dir_all(dest).with_context(|| format!("Creating {dest}"))?; + // Here's the magic trick; modern versions of the `mount` command support a `-N` argument + // to perform the mount in a distinct target namespace. But, what we want to is the inverse + // of this - we want to grab a host/root filesystem mount point. So we explicitly enter + // the host's mount namespace, then give `mount` our own pid (from which it finds the mount namespace). + let desc = format!("Bind mounting {src} from host"); + let target = format!("{}", nix::unistd::getpid()); + Task::new_cmd(&desc, run_in_host_mountns("mount")) + .quiet() + .args(["--bind", "-N", target.as_str(), src.as_str(), dest.as_str()]) + .run() +} + +#[context("Creating ostree deployment")] +async fn initialize_ostree_root_from_self( + state: &State, + containerstate: &ContainerExecutionInfo, + rootfs: &Utf8Path, + kargs: &[&str], +) -> Result { + let opts = &state.opts; + let cancellable = gio::Cancellable::NONE; + + if !containerstate.engine.starts_with("podman") { + anyhow::bail!("Currently this command only supports being executed via podman"); + } + if containerstate.imageid.is_empty() { + anyhow::bail!("Invalid empty imageid"); + } + let digest = crate::podman::imageid_to_digest(&containerstate.imageid)?; + let src_image = crate::utils::digested_pullspec(&containerstate.image, &digest); + + let src_imageref = ostree_container::OstreeImageReference { + sigverify: ostree_container::SignatureSource::ContainerPolicyAllowInsecure, + imgref: ostree_container::ImageReference { + transport: ostree_container::Transport::ContainerStorage, + name: src_image.clone(), + }, + }; + + // Parse the target CLI image reference options + let target_sigverify = if opts.target_no_signature_verification { + SignatureSource::ContainerPolicyAllowInsecure + } else if let Some(remote) = opts.target_ostree_remote.as_deref() { + SignatureSource::OstreeRemote(remote.to_string()) + } else { + SignatureSource::ContainerPolicy + }; + let target_imgref = if let Some(imgref) = opts.target_imgref.as_ref() { + let transport = ostree_container::Transport::try_from(opts.target_transport.as_str())?; + let imgref = ostree_container::ImageReference { + transport, + name: imgref.to_string(), + }; + ostree_container::OstreeImageReference { + sigverify: target_sigverify, + imgref, + } + } else { + ostree_container::OstreeImageReference { + sigverify: target_sigverify, + imgref: ostree_container::ImageReference { + transport: ostree_container::Transport::Registry, + name: containerstate.image.clone(), + }, + } + }; + + // TODO: make configurable? + let stateroot = STATEROOT_DEFAULT; + Task::new_and_run( + "Initializing ostree layout", + "ostree", + ["admin", "init-fs", "--modern", rootfs.as_str()], + )?; + + let repopath = &rootfs.join("ostree/repo"); + for (k, v) in [("sysroot.bootloader", "none"), ("sysroot.readonly", "true")] { + Task::new_and_run( + "Configuring ostree repo", + "ostree", + ["config", "--repo", repopath.as_str(), "set", k, v], + )?; + } + Task::new_and_run( + "Initializing sysroot", + "ostree", + ["admin", "os-init", stateroot, "--sysroot", rootfs.as_str()], + )?; + + // Ensure everything in the ostree repo is labeled + lsm_label(&rootfs.join("ostree"), "/usr".into(), true)?; + + let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path(rootfs))); + sysroot.load(cancellable)?; + + // We need to fetch the container image from the root mount namespace + let skopeo_cmd = run_in_host_mountns("skopeo"); + let proxy_cfg = ostree_container::store::ImageProxyConfig { + skopeo_cmd: Some(skopeo_cmd), + ..Default::default() + }; + + let mut temporary_dir = None; + let src_imageref = if skopeo_supports_containers_storage()? { + src_imageref + } else { + let td = tempfile::tempdir_in("/var/tmp")?; + let path: &Utf8Path = td.path().try_into().unwrap(); + let r = copy_to_oci(&src_imageref, path)?; + temporary_dir = Some(td); + r + }; + + #[allow(clippy::needless_update)] + let options = ostree_container::deploy::DeployOpts { + kargs: Some(kargs), + target_imgref: Some(&target_imgref), + proxy_cfg: Some(proxy_cfg), + ..Default::default() + }; + println!("Creating initial deployment"); + let state = + ostree_container::deploy::deploy(&sysroot, stateroot, &src_imageref, Some(options)).await?; + let target_image = target_imgref.to_string(); + let digest = state.manifest_digest; + println!("Installed: {target_image}"); + println!(" Digest: {digest}"); + + drop(temporary_dir); + + let uname = cap_std_ext::rustix::process::uname(); + + let aleph = InstallAleph { + image: src_image, + kernel: uname.release().to_str()?.to_string(), + }; + + Ok(aleph) +} + +#[context("Copying to oci")] +fn copy_to_oci( + src_imageref: &ostree_container::OstreeImageReference, + dir: &Utf8Path, +) -> Result { + tracing::debug!("Copying {src_imageref}"); + let src_imageref = &src_imageref.imgref.to_string(); + let dest_imageref = ostree_container::ImageReference { + transport: ostree_container::Transport::OciDir, + name: dir.to_string(), + }; + let dest_imageref_str = dest_imageref.to_string(); + Task::new_cmd( + "Copying to temporary OCI (skopeo is too old)", + run_in_host_mountns("skopeo"), + ) + .args([ + "copy", + // TODO: enable this once ostree is fixed "--dest-oci-accept-uncompressed-layers", + src_imageref.as_str(), + dest_imageref_str.as_str(), + ]) + .run()?; + Ok(ostree_container::OstreeImageReference { + sigverify: SignatureSource::ContainerPolicyAllowInsecure, + imgref: dest_imageref, + }) +} + +#[context("Querying skopeo version")] +fn skopeo_supports_containers_storage() -> Result { + let o = run_in_host_mountns("skopeo").arg("--version").output()?; + let st = o.status; + if !st.success() { + anyhow::bail!("Failed to run skopeo --version: {st:?}"); + } + let stdout = String::from_utf8(o.stdout).context("Parsing skopeo version")?; + let mut v = stdout + .strip_prefix("skopeo version ") + .map(|v| v.split('.')) + .ok_or_else(|| anyhow::anyhow!("Unexpected output from skopeo version"))?; + let major = v + .next() + .ok_or_else(|| anyhow::anyhow!("Missing major version"))?; + let minor = v + .next() + .ok_or_else(|| anyhow::anyhow!("Missing minor version"))?; + let (major, minor) = (major.parse::()?, minor.parse::()?); + Ok(major > 1 || minor > 10) +} + +struct RootSetup { + device: Utf8PathBuf, + rootfs: Utf8PathBuf, + boot_uuid: uuid::Uuid, + kargs: Vec, +} + +#[context("Creating rootfs")] +fn install_create_rootfs(state: &State) -> Result { + let opts = &state.opts; + // Verify that the target is empty (if not already wiped in particular, but it's + // also good to verify that the wipe worked) + let device = crate::blockdev::list_dev(&opts.device)?; + + // Handle wiping any existing data + if opts.wipe { + let dev = &opts.device; + for child in device.children.iter().flatten() { + let child = child.path(); + println!("Wiping {child}"); + crate::blockdev::wipefs(Utf8Path::new(&child))?; + } + println!("Wiping {dev}"); + crate::blockdev::wipefs(dev)?; + } else if device.has_children() { + anyhow::bail!( + "Detected existing partitions on {}; use e.g. `wipefs` if you intend to overwrite", + opts.device + ); + } + + // Now at this point, our /dev is a stale snapshot because we don't have udev running. + // So from hereon after, we prefix devices with our temporary devtmpfs mount. + let reldevice = opts + .device + .strip_prefix("/dev/") + .context("Absolute device path in /dev/ required")?; + let device = state.devdir.join(reldevice); + + let root_size = opts + .root_size + .as_deref() + .map(crate::blockdev::parse_size_mib) + .transpose() + .context("Parsing root size")? + .unwrap_or(ROOTFS_SIZE_MB as u64); + + // Create a temporary directory to use for mount points. Note that we're + // in a mount namespace, so these should not be visible on the host. + let rootfs = state.mntdir.join("rootfs"); + std::fs::create_dir_all(&rootfs)?; + let bootfs = state.mntdir.join("boot"); + std::fs::create_dir_all(&bootfs)?; + + // Run sgdisk to create partitions. + let mut sgdisk = Task::new("Initializing partitions", "sgdisk"); + // sgdisk is too verbose + sgdisk.cmd.stdout(Stdio::null()); + sgdisk.cmd.arg("-Z"); + sgdisk.cmd.arg(&device); + sgdisk.cmd.args(["-U", "R"]); + #[allow(unused_assignments)] + if cfg!(target_arch = "x86_64") { + // BIOS-BOOT + sgdisk_partition( + &mut sgdisk.cmd, + 1, + "0:+1M", + "BIOS-BOOT", + Some("21686148-6449-6E6F-744E-656564454649"), + ); + } else if cfg!(target_arch = "aarch64") { + // reserved + sgdisk_partition( + &mut sgdisk.cmd, + 1, + "0:+1M", + "reserved", + Some("8DA63339-0007-60C0-C436-083AC8230908"), + ); + } else { + anyhow::bail!("Unsupported architecture: {}", std::env::consts::ARCH); + } + + let espdev = if cfg!(any(target_arch = "x86_64", target_arch = "aarch64")) { + sgdisk_partition( + &mut sgdisk.cmd, + EFIPN, + format!("0:+{EFIPN_SIZE_MB}M"), + "EFI-SYSTEM", + Some("C12A7328-F81F-11D2-BA4B-00A0C93EC93B"), + ); + Some(format!("{device}{EFIPN}")) + } else { + None + }; + + sgdisk_partition( + &mut sgdisk.cmd, + BOOTPN, + format!("0:+{BOOTPN_SIZE_MB}M"), + "boot", + None, + ); + sgdisk_partition( + &mut sgdisk.cmd, + ROOTPN, + format!("0:{root_size}M"), + "root", + Some("0FC63DAF-8483-4772-8E79-3D69D8477DE4"), + ); + sgdisk.run()?; + + // Reread the partition table + { + let mut f = std::fs::OpenOptions::new() + .write(true) + .open(&device) + .with_context(|| format!("opening {device}"))?; + crate::blockdev::reread_partition_table(&mut f, true) + .context("Rereading partition table")?; + } + + crate::blockdev::udev_settle()?; + + match opts.block_setup { + BlockSetup::Direct => {} + // TODO + BlockSetup::Tpm2Luks => anyhow::bail!("tpm2-luks is not implemented yet"), + } + + // Initialize the /boot filesystem + let bootdev = &format!("{device}{BOOTPN}"); + let boot_uuid = + mkfs(bootdev, Filesystem::Ext4, Some("boot"), []).context("Initializing /boot")?; + + // Initialize rootfs + let rootdev = &format!("{device}{ROOTPN}"); + let root_uuid = mkfs(rootdev, opts.filesystem, Some("root"), [])?; + let rootarg = format!("root=UUID={root_uuid}"); + let bootarg = format!("boot=UUID={boot_uuid}"); + let kargs = vec![rootarg, RW_KARG.to_string(), bootarg]; + + mount(rootdev, &rootfs)?; + lsm_label(&rootfs, "/".into(), false)?; + let bootfs = rootfs.join("boot"); + std::fs::create_dir(&bootfs).context("Creating /boot")?; + // The underlying directory on the root should be labeled + lsm_label(&bootfs, "/boot".into(), false)?; + mount(bootdev, &bootfs)?; + // And we want to label the root mount of /boot + lsm_label(&bootfs, "/boot".into(), false)?; + + // Create the EFI system partition, if applicable + if let Some(espdev) = espdev { + Task::new_and_run( + "Creating ESP filesystem", + "mkfs.fat", + [espdev.as_str(), "-n", "EFI-SYSTEM"], + )?; + let efifs_path = bootfs.join("efi"); + std::fs::create_dir(&efifs_path).context("Creating efi dir")?; + mount(&espdev, &efifs_path)?; + } + + Ok(RootSetup { + device, + rootfs, + boot_uuid, + kargs, + }) +} + +struct SourceData { + /// The embedded base OSTree commit checksum + #[allow(dead_code)] + commit: String, + /// Whether or not SELinux appears to be enabled in the source commit + selinux: bool, +} + +#[context("Gathering source data")] +fn gather_source_data() -> Result { + let cancellable = ostree::gio::Cancellable::NONE; + let commit = Task::new("Reading ostree commit", "ostree") + .args(["--repo=/ostree/repo", "rev-parse", "--single"]) + .quiet() + .read()?; + let root = cap_std::fs::Dir::open_ambient_dir("/", cap_std::ambient_authority())?; + let repo = ostree::Repo::open_at_dir(&root, "ostree/repo")?; + let root = repo + .read_commit(commit.trim(), cancellable) + .context("Reading commit")? + .0; + let root = root.downcast_ref::().unwrap(); + let xattrs = root.xattrs(cancellable)?; + let selinux = crate::lsm::xattrs_have_selinux(&xattrs); + Ok(SourceData { commit, selinux }) +} + +/// Implementation of the `bootc install` CLI command. +pub(crate) async fn install(opts: InstallOpts) -> Result<()> { + // This command currently *must* be run inside a privileged container. + let container_state = crate::containerenv::get_container_execution_info()?; + + // We require --pid=host + let pid = std::fs::read_link("/proc/1/exe").context("reading /proc/1/exe")?; + let pid = pid + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 /proc/1/exe"))?; + if !pid.contains("systemd") { + anyhow::bail!("This command must be run with --pid=host") + } + + // Even though we require running in a container, the mounts we create should be specific + // to this process, so let's enter a private mountns to avoid leaking them. + if std::env::var_os("BOOTC_SKIP_UNSHARE").is_none() { + super::cli::ensure_self_unshared_mount_namespace().await?; + } + + // Let's ensure we have a tmpfs on /tmp, because we need that to write the SELinux label + // (it won't work on the default overlayfs) + Task::new("Creating tmpfs on /tmp", "mount") + .quiet() + .args(["-t", "tmpfs", "tmpfs", "/tmp"]) + .run()?; + + // Now, deal with SELinux state. + let srcdata = gather_source_data()?; + let mut override_disable_selinux = false; + // If the target state has SELinux enabled, we need to check the host state. + if srcdata.selinux { + let host_selinux = crate::lsm::selinux_enabled()?; + tracing::debug!("Target has SELinux, host={host_selinux}"); + if host_selinux { + crate::lsm::selinux_ensure_install()?; + } else if opts.disable_selinux { + override_disable_selinux = true; + println!("notice: Target has SELinux enabled, overriding to disable") + } else { + anyhow::bail!( + "Host kernel does not have SELinux support, but target enables it by default" + ); + } + } else { + tracing::debug!("Target does not enable SELinux"); + } + + // Because SELinux enablement status is cached process-wide and was very likely + // already queried by something else (e.g. glib's constructor), we need to mount + // selinuxfs now if needed, then re-exec *again*. + if srcdata.selinux && !override_disable_selinux { + crate::lsm::container_setup_selinux()?; + } + + // Create our global (read-only) state which gets wrapped in an Arc + // so we can pass it to worker threads too. Right now this just + // combines our command line options along with some bind mounts from the host. + let run_bootc = Utf8Path::new(RUN_BOOTC); + let mntdir = run_bootc.join("mounts"); + if mntdir.exists() { + std::fs::remove_dir_all(&mntdir)?; + } + let devdir = mntdir.join("dev"); + std::fs::create_dir_all(&devdir)?; + Task::new_and_run( + "Mounting devtmpfs", + "mount", + ["devtmpfs", "-t", "devtmpfs", devdir.as_str()], + )?; + // Overmount /var/tmp with the host's, so we can use it to share state + bind_mount_from_host("/var/tmp", "/var/tmp")?; + let state = Arc::new(State { + mntdir, + devdir, + opts, + }); + + // This is all blocking stuff + let rootfs = { + let state = state.clone(); + tokio::task::spawn_blocking(move || install_create_rootfs(&state)).await?? + }; + let mut kargs = rootfs.kargs.iter().map(|v| v.as_str()).collect::>(); + if override_disable_selinux { + kargs.push("selinux=0"); + } + // This is interpreted by our GRUB fragment + if state.opts.ignition_file.is_some() { + kargs.push(crate::ignition::PLATFORM_METAL_KARG); + kargs.push(crate::bootloader::IGNITION_VARIABLE); + } + + let aleph = initialize_ostree_root_from_self( + &state, + &container_state, + &rootfs.rootfs, + kargs.as_slice(), + ) + .await?; + + let aleph = serde_json::to_string(&aleph)?; + std::fs::write(rootfs.rootfs.join(BOOTC_ALEPH_PATH), aleph).context("Writing aleph version")?; + + crate::bootloader::install_via_bootupd(&rootfs.device, &rootfs.rootfs, &rootfs.boot_uuid)?; + + // If Ignition is specified, enable it + if let Some(ignition_file) = state.opts.ignition_file.as_deref() { + let src = std::fs::File::open(ignition_file) + .with_context(|| format!("Opening {ignition_file}"))?; + let bootfs = rootfs.rootfs.join("boot"); + crate::ignition::write_ignition(&bootfs, &state.opts.ignition_hash, &src)?; + crate::ignition::enable_firstboot(&bootfs)?; + println!("Installed Ignition config from {ignition_file}"); + } + + Task::new_and_run( + "Setting root immutable bit", + "chattr", + ["+i", rootfs.rootfs.as_str()], + )?; + + Task::new_and_run("Trimming filesystems", "fstrim", ["-a", "-v"])?; + + let bootfs = rootfs.rootfs.join("boot"); + for fs in [bootfs.as_path(), rootfs.rootfs.as_path()] { + let fsname = fs.file_name().unwrap(); + Task::new(&format!("Finalizing filesystem {fsname}"), "mount") + .args(["-o", "remount,ro", fs.as_str()]) + .run()?; + for a in ["-f", "-u"] { + Task::new("Flushing filesystem journal", "xfs_freeze") + .quiet() + .args([a, fs.as_str()]) + .run()?; + } + } + + Task::new_and_run( + "Unmounting filesystems", + "umount", + ["-R", rootfs.rootfs.as_str()], + )?; + + println!("Installation complete!"); + + Ok(()) +} diff --git a/lib/src/lib.rs b/lib/src/lib.rs index f89f1b33..79d363d6 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -13,11 +13,19 @@ #![deny(clippy::dbg_macro)] #![deny(clippy::todo)] +mod blockdev; +mod bootloader; pub mod cli; +mod containerenv; +pub(crate) mod ignition; +mod install; +mod lsm; +mod podman; #[cfg(feature = "internal-testing-api")] mod privtests; mod reexec; mod status; +mod task; mod utils; #[cfg(feature = "docgen")] diff --git a/lib/src/lsm.rs b/lib/src/lsm.rs new file mode 100644 index 00000000..7a8cd632 --- /dev/null +++ b/lib/src/lsm.rs @@ -0,0 +1,108 @@ +use std::os::unix::process::CommandExt; +use std::path::Path; +use std::process::Command; + +use anyhow::{Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use fn_error_context::context; +use gvariant::{aligned_bytes::TryAsAligned, Marker, Structure}; +use ostree_ext::ostree; + +use crate::task::Task; + +/// The mount path for selinux +const SELINUXFS: &str = "/sys/fs/selinux"; +/// The SELinux xattr +const SELINUX_XATTR: &[u8] = b"security.selinux\0"; + +#[context("Querying selinux availability")] +pub(crate) fn selinux_enabled() -> Result { + let filesystems = std::fs::read_to_string("/proc/filesystems")?; + Ok(filesystems.contains("selinuxfs\n")) +} + +#[context("Ensuring selinux install_t type")] +pub(crate) fn selinux_ensure_install() -> Result<()> { + let guardenv = "_bootc_selinuxfs_mounted"; + if let Some(p) = std::env::var_os(guardenv) { + let p = Path::new(&p); + if p.exists() { + tracing::debug!("Removing temporary file"); + std::fs::remove_file(p).context("Removing {p:?}")?; + } + return Ok(()); + } + tracing::debug!("Copying self to temporary file for re-exec"); + // OK now, we always copy our binary to a tempfile, set its security context + // to match that of /usr/bin/ostree, and then re-exec. This is really a gross + // hack; we can't always rely on https://github.com/fedora-selinux/selinux-policy/pull/1500/commits/67eb283c46d35a722636d749e5b339615fe5e7f5 + let mut tmpf = tempfile::NamedTempFile::new()?; + let mut src = std::fs::File::open("/proc/self/exe")?; + let meta = src.metadata()?; + std::io::copy(&mut src, &mut tmpf).context("Copying self to tempfile for selinux re-exec")?; + tmpf.as_file_mut() + .set_permissions(meta.permissions()) + .context("Setting permissions of tempfile")?; + let tmpf: Utf8PathBuf = tmpf.keep()?.1.try_into().unwrap(); + lsm_label(&tmpf, "/usr/bin/ostree".into(), false)?; + tracing::debug!("Created {tmpf:?}"); + + let mut cmd = Command::new(&tmpf); + cmd.env(guardenv, tmpf); + cmd.args(std::env::args_os().skip(1)); + Err(anyhow::Error::msg(cmd.exec()).context("execve")) +} + +/// Ensure that /sys/fs/selinux is mounted, and ensure we're running +/// as install_t. +#[context("Ensuring selinux mount")] +pub(crate) fn container_setup_selinux() -> Result<()> { + let path = Utf8Path::new(SELINUXFS); + if !path.join("enforce").exists() { + if !path.exists() { + std::fs::create_dir(path)?; + } + Task::new("Mounting selinuxfs", "mount") + .args(["selinuxfs", "-t", "selinuxfs", path.as_str()]) + .run()?; + } + + selinux_ensure_install() +} + +fn selinux_label_for_path(target: &str) -> Result { + // TODO: detect case where SELinux isn't enabled + let o = Command::new("matchpathcon").args(["-n", target]).output()?; + let st = o.status; + if !st.success() { + anyhow::bail!("matchpathcon failed: {st:?}"); + } + let label = String::from_utf8(o.stdout)?; + let label = label.trim(); + Ok(label.to_string()) +} + +// Write filesystem labels (currently just for SELinux) +#[context("Labeling {as_path}")] +pub(crate) fn lsm_label(target: &Utf8Path, as_path: &Utf8Path, recurse: bool) -> Result<()> { + let label = selinux_label_for_path(as_path.as_str())?; + Task::new("Setting SELinux security context (chcon)", "chcon") + .quiet() + .args(["-h"]) + .args(recurse.then_some("-R")) + .args(["-h", label.as_str(), target.as_str()]) + .run() +} + +pub(crate) fn xattrs_have_selinux(xattrs: &ostree::glib::Variant) -> bool { + let v = xattrs.data_as_bytes(); + let v = v.try_as_aligned().unwrap(); + let v = gvariant::gv!("a(ayay)").cast(v); + for xattr in v.iter() { + let k = xattr.to_tuple().0; + if k == SELINUX_XATTR { + return true; + } + } + false +} diff --git a/lib/src/podman.rs b/lib/src/podman.rs new file mode 100644 index 00000000..03c0fab9 --- /dev/null +++ b/lib/src/podman.rs @@ -0,0 +1,27 @@ +use anyhow::{anyhow, Result}; +use serde::Deserialize; + +use crate::utils::run_in_host_mountns; + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct Inspect { + pub(crate) digest: String, +} + +/// Given an image ID, return its manifest digest +pub(crate) fn imageid_to_digest(imgid: &str) -> Result { + let o = run_in_host_mountns("podman") + .args(["inspect", imgid]) + .output()?; + let st = o.status; + if !st.success() { + anyhow::bail!("Failed to execute podman inspect: {st:?}"); + } + let o: Vec = serde_json::from_slice(&o.stdout)?; + let i = o + .into_iter() + .next() + .ok_or_else(|| anyhow!("No images returned for inspect"))?; + Ok(i.digest) +} diff --git a/lib/src/privtests.rs b/lib/src/privtests.rs index be639c8c..b260e388 100644 --- a/lib/src/privtests.rs +++ b/lib/src/privtests.rs @@ -3,6 +3,7 @@ use std::process::Command; use anyhow::Result; use camino::{Utf8Path, Utf8PathBuf}; use cap_std_ext::rustix; +use fn_error_context::context; use rustix::fd::AsFd; use xshell::{cmd, Shell}; @@ -42,7 +43,8 @@ fn init_ostree(sh: &Shell, rootfs: &Utf8Path) -> Result<()> { Ok(()) } -pub(crate) fn impl_run() -> Result<()> { +#[context("bootc status")] +fn run_bootc_status() -> Result<()> { let sh = Shell::new()?; let loopdev = LoopbackDevice::new_temp(&sh)?; @@ -54,17 +56,45 @@ pub(crate) fn impl_run() -> Result<()> { let td: &Utf8Path = td.try_into()?; cmd!(sh, "mkfs.xfs {devpath}").run()?; - cmd!(sh, "mount {devpath} {td}").run()?; init_ostree(&sh, td)?; + // Basic sanity test of `bootc status` on an uninitialized root let _g = sh.push_env("OSTREE_SYSROOT", td); cmd!(sh, "bootc status").run()?; Ok(()) } +// This needs nontrivial work for loopback devices +// #[context("bootc install")] +// fn run_bootc_install() -> Result<()> { +// let sh = Shell::new()?; +// let loopdev = LoopbackDevice::new_temp(&sh)?; +// let devpath = &loopdev.dev; +// println!("Using {devpath:?}"); + +// let selinux_enabled = crate::lsm::selinux_enabled()?; +// let selinux_opt = if selinux_enabled { +// "" +// } else { +// "--disable-selinux" +// }; + +// cmd!(sh, "bootc install {selinux_opt} {devpath}").run()?; + +// Ok(()) +// } + +pub(crate) fn impl_run() -> Result<()> { + run_bootc_status()?; + println!("ok bootc status"); + //run_bootc_install()?; + //println!("ok bootc install"); + Ok(()) +} + pub(crate) async fn run(opts: &TestingOpts) -> Result<()> { match opts { TestingOpts::RunPrivilegedIntegration {} => tokio::task::spawn_blocking(impl_run).await?, diff --git a/lib/src/task.rs b/lib/src/task.rs new file mode 100644 index 00000000..b724767e --- /dev/null +++ b/lib/src/task.rs @@ -0,0 +1,86 @@ +use std::{ + ffi::OsStr, + process::{Command, Stdio}, +}; + +use anyhow::{Context, Result}; + +pub(crate) struct Task { + description: String, + quiet: bool, + pub(crate) cmd: Command, +} + +impl Task { + pub(crate) fn new(description: impl AsRef, exe: impl AsRef) -> Self { + Self::new_cmd(description, Command::new(exe.as_ref())) + } + + pub(crate) fn new_cmd(description: impl AsRef, mut cmd: Command) -> Self { + let description = description.as_ref().to_string(); + // Default to noninteractive + cmd.stdin(Stdio::null()); + Self { + description, + quiet: false, + cmd, + } + } + + pub(crate) fn quiet(mut self) -> Self { + self.quiet = true; + self + } + + pub(crate) fn args>(mut self, args: impl IntoIterator) -> Self { + self.cmd.args(args); + self + } + + /// Run the command, returning an error if the command does not exit successfully. + pub(crate) fn run(self) -> Result<()> { + let description = self.description; + let mut cmd = self.cmd; + if !self.quiet { + println!("{description}"); + } + tracing::debug!("exec: {cmd:?}"); + let st = cmd.status()?; + if !st.success() { + anyhow::bail!("Task {description} failed: {st:?}"); + } + Ok(()) + } + + /// Like [`run()`], but return stdout. + pub(crate) fn read(self) -> Result { + let description = self.description; + let mut cmd = self.cmd; + if !self.quiet { + println!("{description}"); + } + tracing::debug!("exec: {cmd:?}"); + cmd.stdout(Stdio::piped()); + let child = cmd + .spawn() + .with_context(|| format!("Spawning {description} failed"))?; + let o = child + .wait_with_output() + .with_context(|| format!("Executing {description} failed"))?; + let st = o.status; + if !st.success() { + anyhow::bail!("Task {description} failed: {st:?}"); + } + Ok(String::from_utf8(o.stdout)?) + } + + pub(crate) fn new_and_run<'a>( + description: impl AsRef, + exe: impl AsRef, + args: impl IntoIterator, + ) -> Result<()> { + let mut t = Self::new(description.as_ref(), exe); + t.cmd.args(args); + t.run() + } +} diff --git a/lib/src/utils.rs b/lib/src/utils.rs index ced79f75..d1cbb062 100644 --- a/lib/src/utils.rs +++ b/lib/src/utils.rs @@ -1,4 +1,5 @@ use std::fmt::Display; +use std::process::Command; use anyhow::{Context, Result}; use ostree::glib; @@ -46,3 +47,17 @@ where { serializer.collect_str(value) } + +/// Run a command in the host mount namespace +pub(crate) fn run_in_host_mountns(cmd: &str) -> Command { + let mut c = Command::new("nsenter"); + c.args(["-m", "-t", "1", "--", cmd]); + c +} + +/// Given a possibly tagged image like quay.io/foo/bar:latest and a digest 0ab32..., return +/// the digested form quay.io/foo/bar@sha256:0ab32... +pub(crate) fn digested_pullspec(image: &str, digest: &str) -> String { + let image = image.rsplit_once(':').map(|v| v.0).unwrap_or(image); + format!("{image}@{digest}") +} diff --git a/tests/kolainst/install b/tests/kolainst/install new file mode 100755 index 00000000..f308d807 --- /dev/null +++ b/tests/kolainst/install @@ -0,0 +1,27 @@ +#!/bin/bash +# Verify install path +## kola: +## timeoutMin: 30 +## tags: "needs-internet" +## platforms: qemu # additionalDisks is only supported on qemu +## additionalDisks: ["20G"] +# +# Copyright (C) 2022 Red Hat, Inc. + +set -xeuo pipefail + +# See https://github.com/cgwalters/bootc-base-images +IMAGE=quay.io/cgwalters/fedora-oscore:latest + +# Always work out of a temporary directory +cd $(mktemp -d) + +case "${AUTOPKGTEST_REBOOT_MARK:-}" in + "") + podman run --rm -ti --privileged --pid=host --net=none -v /usr/bin/bootc:/usr/bin/bootc ${IMAGE} bootc install /dev/vda + # In theory we could e.g. wipe the bootloader setup on the primary disk, then reboot; + # but for now let's just sanity test that the install command executes. + echo "ok install" + ;; + *) echo "unexpected mark: ${AUTOPKGTEST_REBOOT_MARK}"; exit 1;; +esac