Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tempfile: Add support for TempFile #239

Merged
merged 1 commit into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cap-tempfile/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repository = "https://github.com/bytecodealliance/cap-std"
edition = "2018"

[dependencies]
cap-std = { path = "../cap-std", version = "^0.24.1-alpha.0"}
cap-std = { path = "../cap-std", version = "^0.24.1-alpha.0" }
uuid = { version = "0.8.1", features = ["v4"] }
camino = { version = "1.0.5", optional = true }

Expand All @@ -23,6 +23,9 @@ rand = "0.8.1"
[target.'cfg(windows)'.dev-dependencies]
winapi = "0.3.9"

[target.'cfg(not(windows))'.dependencies]
rustix = { version = "0.33.0", features = ["procfs"] }

[features]
default = []
fs_utf8 = ["cap-std/fs_utf8", "camino"]
Expand Down
29 changes: 27 additions & 2 deletions cap-tempfile/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ use uuid::Uuid;
#[cfg(feature = "fs_utf8")]
pub mod utf8;

mod tempfile;
pub use tempfile::*;

#[doc(hidden)]
pub use cap_std::ambient_authority_known_at_compile_time;
pub use cap_std::{ambient_authority, AmbientAuthority};
Expand Down Expand Up @@ -104,7 +107,7 @@ impl TempDir {
mem::take(&mut self.dir).unwrap().remove_open_dir_all()
}

fn new_name() -> String {
pub(crate) fn new_name() -> String {
#[cfg(not(target_os = "emscripten"))]
{
Uuid::new_v4().to_string()
Expand All @@ -120,7 +123,7 @@ impl TempDir {
}
}

const fn num_iterations() -> i32 {
pub(crate) const fn num_iterations() -> i32 {
i32::MAX
}

Expand Down Expand Up @@ -177,6 +180,28 @@ pub fn tempdir_in(dir: &Dir) -> io::Result<TempDir> {
TempDir::new_in(dir)
}

/// Call f repeatedly, passing a randomly generated temporary name.
/// An error matching the `err` will be ignored.
/// This will repeat until a maximum number of attempts is reached.
/// On success, the result of the function call along with the provided name is returned.
pub(crate) fn retry_with_name_ignoring<F, T>(
err: std::io::ErrorKind,
mut f: F,
) -> io::Result<(T, String)>
where
F: FnMut(&str) -> io::Result<T>,
{
for _ in 0..TempDir::num_iterations() {
let name = TempDir::new_name();
match f(name.as_str()) {
Ok(r) => return Ok((r, name)),
Err(e) if e.kind() == err => continue,
Err(e) => return Err(e),
}
}
return Err(std::io::Error::new(err, "too many temporary files exist"));
}

#[test]
fn drop_tempdir() {
use crate::ambient_authority;
Expand Down
255 changes: 255 additions & 0 deletions cap-tempfile/src/tempfile.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
//! Temporary files.

use cap_std::fs::{Dir, File};
use std::ffi::OsStr;
use std::io::{self, Write};
use std::io::{Read, Seek};

/// A file in a directory that is by default deleted when it goes out
/// of scope, but may also be written persistently.
///
/// This corresponds most closely to [`tempfile::NamedTempFile`]; however,
/// there are some important differences, so read the below carefully
/// to understand how to port existing code.
///
/// # Name-able, but not necessarily named
///
/// By default, the file does not necessarily have an name until the file is written
/// persistently.
///
/// On some operating systems like Linux, it is possible to create anonymous
/// temporary files that can still be written to disk persistently via `O_TMPFILE`.
/// The advantage of this is that if the process (or operating system) crashes
/// while the file is being written, the temporary space will be automatically cleaned up.
/// For this reason, there is no API to retrieve the name, for either case.
cgwalters marked this conversation as resolved.
Show resolved Hide resolved
///
/// To more closely match the semantics of [`tempfile::tempfile`], use [`crate::TempFile::new_anonymous`].
///
/// # File permissions
///
/// Unlike the tempfile crate, the default [`TempFile::new`] will use the same permissions as [`File::create_new`] in
/// the Rust standard library. On Unix for example, this is `0o666` modified by `umask`.
/// The rationale for this is to make it more ergonomic and natural to use this API to atomically create new files
/// and replace existing ones.
///
/// [`tempfile::NamedTempFile`]: https://docs.rs/tempfile/latest/tempfile/struct.NamedTempFile.html
/// [`File::create_new`]: https://doc.rust-lang.org/std/fs/struct.OpenOptions.html#method.create_new
pub struct TempFile<'d> {
dir: &'d Dir,
fd: File,
name: Option<String>,
}

#[cfg(any(target_os = "android", target_os = "linux"))]
fn new_tempfile_linux(d: &Dir) -> io::Result<Option<File>> {
use rustix::fd::FromFd;
use rustix::fs::{Mode, OFlags};
// openat's API uses WRONLY. There may be use cases for reading too, so let's support it.
let oflags = OFlags::CLOEXEC | OFlags::TMPFILE | OFlags::RDWR;
// We default to 0o666, same as main rust when creating new files; this will be modified by
// umask: https://github.com/rust-lang/rust/blob/44628f7273052d0bb8e8218518dacab210e1fe0d/library/std/src/sys/unix/fs.rs#L762
let mode = Mode::from_raw_mode(0o666);
// Happy path - Linux with O_TMPFILE
match rustix::fs::openat(d, ".", oflags, mode) {
Ok(r) => return Ok(Some(File::from_fd(r.into()))),
// See https://github.com/Stebalien/tempfile/blob/1a40687e06eb656044e3d2dffa1379f04b3ef3fd/src/file/imp/unix.rs#L81
Err(rustix::io::Error::OPNOTSUPP | rustix::io::Error::ISDIR | rustix::io::Error::NOENT) => {
Ok(None)
}
Err(e) => {
return Err(e.into());
}
}
}

/// Assign a random name to a currently anonymous O_TMPFILE descriptor.
#[cfg(any(target_os = "android", target_os = "linux"))]
fn generate_name_in(subdir: &Dir, f: &File) -> io::Result<String> {
use rustix::fd::AsFd;
use rustix::fs::AtFlags;
let procself_fd = rustix::io::proc_self_fd()?;
let fdnum = rustix::path::DecInt::from_fd(&f.as_fd());
let fdnum = fdnum.as_c_str();
super::retry_with_name_ignoring(io::ErrorKind::AlreadyExists, |name| {
rustix::fs::linkat(&procself_fd, fdnum, subdir, name, AtFlags::SYMLINK_FOLLOW)
cgwalters marked this conversation as resolved.
Show resolved Hide resolved
.map_err(Into::into)
})
.map(|(_, name)| name)
}

/// Create a new temporary file in the target directory, which may or may not have a (randomly generated) name at this point.
fn new_tempfile(d: &Dir) -> io::Result<(File, Option<String>)> {
// On Linux, try O_TMPFILE
#[cfg(any(target_os = "android", target_os = "linux"))]
if let Some(f) = new_tempfile_linux(d)? {
return Ok((f, None));
}
// Otherwise, fall back to just creating a randomly named file.
let mut opts = cap_std::fs::OpenOptions::new();
opts.read(true);
opts.write(true);
opts.create_new(true);
cgwalters marked this conversation as resolved.
Show resolved Hide resolved
super::retry_with_name_ignoring(io::ErrorKind::AlreadyExists, |name| {
d.open_with(name, &opts)
})
.map(|(f, name)| (f, Some(name)))
}

impl<'d> TempFile<'d> {
/// Crate a new temporary file in the provided directory.
pub fn new(dir: &'d Dir) -> io::Result<Self> {
let (fd, name) = new_tempfile(dir)?;
Ok(Self { dir, fd, name })
}

/// Crate a new temporary file in the provided directory that will not have a
/// name. This corresponds to [`tempfile::tempfile_in`].
///
/// [`tempfile::tempfile_in`]: https://docs.rs/tempfile/latest/tempfile/fn.tempfile_in.html
pub fn new_anonymous(dir: &'d Dir) -> io::Result<File> {
let (fd, name) = new_tempfile(dir)?;
if let Some(name) = name {
dir.remove_file(name)?;
}
Ok(fd)
}

/// Get a reference to the underlying file.
pub fn as_file(&self) -> &File {
&self.fd
}

/// Get a mutable reference to the underlying file.
pub fn as_file_mut(&mut self) -> &mut File {
&mut self.fd
}

fn impl_replace(mut self, destname: &OsStr) -> io::Result<()> {
// At this point on Linux if O_TMPFILE is used, we need to give the file a temporary name in
// order to link it into place. There are patches to add an `AT_LINKAT_REPLACE`
// API. With that we could skip this and have file-leak-proof atomic file replacement:
// https://marc.info/?l=linux-fsdevel&m=158028833007418&w=2
#[cfg(any(target_os = "android", target_os = "linux"))]
let tempname = self
.name
.take()
.map(Ok)
.unwrap_or_else(|| generate_name_in(self.dir, &self.fd))?;
// SAFETY: We only support anonymous files on Linux, so the file must have a name here.
#[cfg(not(any(target_os = "android", target_os = "linux")))]
let tempname = self.name.take().unwrap();
// And try the rename into place.
self.dir.rename(&tempname, self.dir, destname).map_err(|e| {
// But, if we catch an error here, then move ownership back into self,
// which means the Drop invocation will clean it up.
self.name = Some(tempname);
e.into()
})
}

/// Write the file to the target directory with the provided name.
/// Any existing file will be replaced.
///
/// The file permissions will default to read-only.
pub fn replace(self, destname: impl AsRef<OsStr>) -> io::Result<()> {
let destname = destname.as_ref();
self.impl_replace(destname)
}
}

impl<'d> Read for TempFile<'d> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.as_file_mut().read(buf)
}
}

impl<'d> Write for TempFile<'d> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.as_file_mut().write(buf)
}
#[inline]
fn flush(&mut self) -> io::Result<()> {
self.as_file_mut().flush()
}
}

impl<'d> Seek for TempFile<'d> {
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
self.as_file_mut().seek(pos)
}
}

impl<'d> Drop for TempFile<'d> {
fn drop(&mut self) {
if let Some(name) = self.name.take() {
let _ = self.dir.remove_file(name);
}
}
}

#[cfg(test)]
mod test {
use super::*;
use std::io;

/// On Unix, calling `umask()` actually *mutates* the process global state.
/// This uses Linux `/proc` to read the current value.
#[cfg(any(target_os = "android", target_os = "linux"))]
fn get_process_umask() -> io::Result<u32> {
use io::BufRead;
let status = std::fs::File::open("/proc/self/status")?;
let bufr = io::BufReader::new(status);
for line in bufr.lines() {
let line = line?;
let l = if let Some(v) = line.split_once(':') {
v
} else {
continue;
};
let (k, v) = l;
if k != "Umask" {
continue;
}
return Ok(u32::from_str_radix(v.trim(), 8).unwrap());
}
panic!("Could not determine process umask")
}

#[test]
fn test_tempfile() -> io::Result<()> {
use crate::ambient_authority;

let td = crate::tempdir(ambient_authority())?;

// Base case, verify we clean up on drop
let tf = TempFile::new(&td).unwrap();
drop(tf);
assert_eq!(td.entries()?.into_iter().count(), 0);

let mut tf = TempFile::new(&td)?;
// Test that we created with the right permissions
#[cfg(any(target_os = "android", target_os = "linux"))]
{
use rustix::fs::MetadataExt;
use rustix::fs::Mode;
let umask = get_process_umask()?;
let metadata = tf.as_file().metadata().unwrap();
let mode = metadata.mode();
let mode = Mode::from_bits_truncate(mode);
assert_eq!(0o666 & !umask, mode.bits());
}
// And that we can write
tf.write_all(b"hello world")?;
drop(tf);
assert_eq!(td.entries()?.into_iter().count(), 0);

let mut tf = TempFile::new(&td)?;
tf.write_all(b"hello world")?;
tf.replace("testfile").unwrap();
assert_eq!(td.entries()?.into_iter().count(), 1);

assert_eq!(td.read("testfile")?, b"hello world");

td.close()
}
}