Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for securing tty I/O. #684

Merged
merged 15 commits into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions crates/wasi-common/src/fdentry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ impl FdEntry {
Ok(())
}
}

/// Test whether this descriptor is considered a tty within WASI.
/// Note that since WASI itself lacks an `isatty` syscall and relies
/// on a conservative approximation, we use the same approximation here.
pub(crate) fn isatty(&self) -> bool {
self.file_type == wasi::__WASI_FILETYPE_CHARACTER_DEVICE
&& (self.rights_base & (wasi::__WASI_RIGHTS_FD_SEEK | wasi::__WASI_RIGHTS_FD_TELL)) == 0
}
}

/// This allows an `OsHandle` to be temporarily borrowed from a
Expand Down
32 changes: 24 additions & 8 deletions crates/wasi-common/src/hostcalls_impl/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::ctx::WasiCtx;
use crate::fdentry::{Descriptor, FdEntry};
use crate::helpers::*;
use crate::memory::*;
use crate::sandboxed_tty_writer::SandboxedTTYWriter;
use crate::sys::fdentry_impl::determine_type_rights;
use crate::sys::hostcalls_impl::fs_helpers::path_open_rights;
use crate::sys::{host_impl, hostcalls_impl};
Expand All @@ -12,6 +13,7 @@ use filetime::{set_file_handle_times, FileTime};
use log::trace;
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::ops::DerefMut;
use std::time::{Duration, SystemTime, UNIX_EPOCH};

pub(crate) unsafe fn fd_close(
Expand Down Expand Up @@ -168,7 +170,7 @@ pub(crate) unsafe fn fd_read(
.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_READ, 0)?
{
Descriptor::OsHandle(file) => file.read_vectored(&mut iovs),
Descriptor::Stdin => io::stdin().lock().read_vectored(&mut iovs),
Descriptor::Stdin => io::stdin().read_vectored(&mut iovs),
sunfishcode marked this conversation as resolved.
Show resolved Hide resolved
_ => return Err(Error::EBADF),
};

Expand Down Expand Up @@ -372,21 +374,35 @@ pub(crate) unsafe fn fd_write(
let iovs: Vec<io::IoSlice> = iovs.iter().map(|vec| host::ciovec_to_host(vec)).collect();

// perform unbuffered writes
let host_nwritten = match wasi_ctx
.get_fd_entry_mut(fd)?
.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)?
{
Descriptor::OsHandle(file) => file.write_vectored(&iovs)?,
let entry = wasi_ctx.get_fd_entry_mut(fd)?;
let isatty = entry.isatty();
let desc = entry.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)?;
let host_nwritten = match desc {
Descriptor::OsHandle(file) => {
if isatty {
SandboxedTTYWriter::new(file.deref_mut()).write_vectored(&iovs)?
} else {
file.write_vectored(&iovs)?
}
}
Descriptor::Stdin => return Err(Error::EBADF),
Descriptor::Stdout => {
// lock for the duration of the scope
let stdout = io::stdout();
let mut stdout = stdout.lock();
let nwritten = stdout.write_vectored(&iovs)?;
let nwritten = if isatty {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we shouldn't only sanitize TTY output resp. handle terminal output in a different way than other file based output, because malicious control sequences e.g. in log files will do exactly the same harm to the host system, whenever they are viewed by a cat command etc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the PR here as fixing what we can given the current WASI API. A more complete fix will require changing the WASI spec, which I'm also looking into.

SandboxedTTYWriter::new(&mut stdout).write_vectored(&iovs)?
} else {
stdout.write_vectored(&iovs)?
};
stdout.flush()?;
nwritten
}
Descriptor::Stderr => io::stderr().lock().write_vectored(&iovs)?,
// Always sanitize stderr, even if it's not directly connected to a tty,
// because stderr is meant for diagnostics rather than binary output,
// and may be redirected to a file which could end up being displayed
// on a tty later.
Descriptor::Stderr => SandboxedTTYWriter::new(&mut io::stderr()).write_vectored(&iovs)?,
};

trace!(" | *nwritten={:?}", host_nwritten);
Expand Down
1 change: 1 addition & 0 deletions crates/wasi-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ mod error;
mod fdentry;
mod helpers;
mod hostcalls_impl;
mod sandboxed_tty_writer;
mod sys;
#[macro_use]
mod macros;
Expand Down
8 changes: 8 additions & 0 deletions crates/wasi-common/src/old/snapshot_0/fdentry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ impl FdEntry {
Ok(())
}
}

/// Test whether this descriptor is considered a tty within WASI.
/// Note that since WASI itself lacks an `isatty` syscall and relies
/// on a conservative approximation, we use the same approximation here.
pub(crate) fn isatty(&self) -> bool {
self.file_type == wasi::__WASI_FILETYPE_CHARACTER_DEVICE
&& (self.rights_base & (wasi::__WASI_RIGHTS_FD_SEEK | wasi::__WASI_RIGHTS_FD_TELL)) == 0
}
}

/// This allows an `OsHandle` to be temporarily borrowed from a
Expand Down
32 changes: 24 additions & 8 deletions crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ use crate::old::snapshot_0::sys::fdentry_impl::determine_type_rights;
use crate::old::snapshot_0::sys::hostcalls_impl::fs_helpers::path_open_rights;
use crate::old::snapshot_0::sys::{host_impl, hostcalls_impl};
use crate::old::snapshot_0::{helpers, host, wasi, wasi32, Error, Result};
use crate::sandboxed_tty_writer::SandboxedTTYWriter;
use filetime::{set_file_handle_times, FileTime};
use log::trace;
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::ops::DerefMut;
use std::time::{Duration, SystemTime, UNIX_EPOCH};

pub(crate) unsafe fn fd_close(wasi_ctx: &mut WasiCtx, fd: wasi::__wasi_fd_t) -> Result<()> {
Expand Down Expand Up @@ -160,7 +162,7 @@ pub(crate) unsafe fn fd_read(
.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_READ, 0)?
{
Descriptor::OsHandle(file) => file.read_vectored(&mut iovs),
Descriptor::Stdin => io::stdin().lock().read_vectored(&mut iovs),
Descriptor::Stdin => io::stdin().read_vectored(&mut iovs),
_ => return Err(Error::EBADF),
};

Expand Down Expand Up @@ -357,21 +359,35 @@ pub(crate) unsafe fn fd_write(
let iovs: Vec<io::IoSlice> = iovs.iter().map(|vec| host::ciovec_to_host(vec)).collect();

// perform unbuffered writes
let host_nwritten = match wasi_ctx
.get_fd_entry_mut(fd)?
.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)?
{
Descriptor::OsHandle(file) => file.write_vectored(&iovs)?,
let entry = wasi_ctx.get_fd_entry_mut(fd)?;
let isatty = entry.isatty();
let desc = entry.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)?;
let host_nwritten = match desc {
Descriptor::OsHandle(file) => {
if isatty {
SandboxedTTYWriter::new(file.deref_mut()).write_vectored(&iovs)?
} else {
file.write_vectored(&iovs)?
}
}
Descriptor::Stdin => return Err(Error::EBADF),
Descriptor::Stdout => {
// lock for the duration of the scope
let stdout = io::stdout();
let mut stdout = stdout.lock();
let nwritten = stdout.write_vectored(&iovs)?;
let nwritten = if isatty {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be an option to wasmtime (security strict mode vs lesser safe mode). Having the asymmetry behavior between stdout/stderr seems weird. stderr could still be used for binary data.

I do see it could be common to pipe raw/binary data into a file from wasmtime - but maybe it could be ok to require the wasmtime caller to explicitly enable this with a command line option.

I just fell security should take precedence over convenience. I guess wasmtime could just fail with nice error message when it sees escape codes to stdout/stderr (maybe also tty) in the output so the caller at least gets an explanation of why it failed.

Also, we do NOT know what the calling process of wasmtime does with the stdout/stderr handed to wasmtime. Could end up forwarding the raw data to its terminal stdout/stderr which is hidden to wasmtime.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that there's room for improvement in these areas. I'm working on a more comprehensive design, including changes to WASI itself, which I hope will address these concerns.

SandboxedTTYWriter::new(&mut stdout).write_vectored(&iovs)?
} else {
stdout.write_vectored(&iovs)?
};
stdout.flush()?;
nwritten
}
Descriptor::Stderr => io::stderr().lock().write_vectored(&iovs)?,
// Always sanitize stderr, even if it's not directly connected to a tty,
// because stderr is meant for diagnostics rather than binary output,
// and may be redirected to a file which could end up being displayed
// on a tty later.
Descriptor::Stderr => SandboxedTTYWriter::new(&mut io::stderr()).write_vectored(&iovs)?,
};

trace!(" | *nwritten={:?}", host_nwritten);
Expand Down
178 changes: 178 additions & 0 deletions crates/wasi-common/src/sandboxed_tty_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
use std::io::{Result, Write};

/// An adapter around a `Write` stream that guarantees that its output
/// is valid UTF-8 and contains no control characters. It does this by
/// replacing characters with inert control pictures and replacement
/// characters.
pub(crate) struct SandboxedTTYWriter<'writer, Writer>
where
Writer: Write,
{
inner: &'writer mut Writer,
}

impl<'writer, Writer> SandboxedTTYWriter<'writer, Writer>
where
Writer: Write,
{
/// Construct a new `SandboxedTTYWriter` with the given inner `Writer`.
pub(crate) fn new(inner: &'writer mut Writer) -> Self {
Self { inner }
}

/// Write a single character to the output.
pub(crate) fn write_char(&mut self, c: char) -> Result<()> {
self.inner.write(
match c {
'\u{0000}' => '␀',
'\u{0001}' => '␁',
'\u{0002}' => '␂',
'\u{0003}' => '␃',
'\u{0004}' => '␄',
'\u{0005}' => '␅',
'\u{0006}' => '␆',
'\u{0007}' => '␇',
'\u{0008}' => '␈',
'\u{0009}' => '\t',
'\u{000A}' => '\n',
'\u{000B}' => '␋',
'\u{000C}' => '␌',
'\u{000D}' => '\r',
'\u{000E}' => '␎',
'\u{000F}' => '␏',
'\u{0010}' => '␐',
'\u{0011}' => '␑',
'\u{0012}' => '␒',
'\u{0013}' => '␓',
'\u{0014}' => '␔',
'\u{0015}' => '␕',
'\u{0016}' => '␖',
'\u{0017}' => '␗',
'\u{0018}' => '␘',
'\u{0019}' => '␙',
'\u{001A}' => '␚',
'\u{001B}' => '␛',
'\u{001C}' => '␜',
'\u{001D}' => '␝',
'\u{001E}' => '␞',
'\u{001F}' => '␟',
'\u{007F}' => '␡',
x if x.is_control() => '�',
x => x,
}
.encode_utf8(&mut [0; 4]) // UTF-8 encoding of a `char` is at most 4 bytes.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI Limit is only 4 if character code follows Unicode std limit 0x10ffff. But ISO std defines limit as 0x7ffffff. Which would require 6 bytes. So depends which limit the implementation ensures. But for simple pass-through encoding 6 bytes would be the safest.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust documents that 4 is enough. If 5-byte or 6-byte UTF-8 encodings should ever be revived, we can expect Rust to add new functions rather than change such an invariant in existing functions.

.as_bytes(),
)?;

Ok(())
}

/// Write a string to the output.
pub(crate) fn write_str(&mut self, s: &str) -> Result<usize> {
let mut result = 0;

for c in s.chars() {
self.write_char(c)?;
// Note that we use the encoding length of the given char, rather than
// how many bytes we actually wrote, because our users don't know about
// what's really being written.
result += c.len_utf8();
}

Ok(result)
}
}

impl<'writer, Writer> Write for SandboxedTTYWriter<'writer, Writer>
where
Writer: Write,
{
fn write(&mut self, buf: &[u8]) -> Result<usize> {
let mut input = buf;
let mut result = 0;

// Decode the string without heap-allocating it. See the example here
// for more details:
// https://doc.rust-lang.org/std/str/struct.Utf8Error.html#examples
loop {
match std::str::from_utf8(input) {
Ok(valid) => {
result += self.write_str(valid)?;
break;
}
Err(error) => {
let (valid, after_valid) = input.split_at(error.valid_up_to());
result += self.write_str(unsafe { std::str::from_utf8_unchecked(valid) })?;
self.write_char('�')?;

if let Some(invalid_sequence_length) = error.error_len() {
// An invalid sequence was encountered. Tell the application we've
// written those bytes (though actually, we replaced them with U+FFFD).
result += invalid_sequence_length;
// Set up `input` to resume writing after the end of the sequence.
input = &after_valid[invalid_sequence_length..];
} else {
// The end of the buffer was encountered unexpectedly. Tell the application
// we've written out the remainder of the buffer.
result += after_valid.len();
break;
}
}
}
}

return Ok(result);
}

fn flush(&mut self) -> Result<()> {
self.inner.flush()
}
}

#[cfg(test)]
mod tests {
use super::SandboxedTTYWriter;
use std::io::{Result, Write};

#[test]
fn basic() -> Result<()> {
let mut buffer = Vec::new();
let mut safe = SandboxedTTYWriter::new(&mut buffer);
safe.write_str("a\0b\u{0080}")?;
safe.write_char('\u{0007}')?;
safe.write(&[0x80])?;
safe.write(&[0xed, 0xa0, 0x80, 0xff, 0xfe])?;
assert_eq!(
buffer,
"a\u{2400}b\u{FFFD}\u{2407}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}".as_bytes()
);
Ok(())
}

#[test]
fn how_many_replacements() -> Result<()> {
// See https://hsivonen.fi/broken-utf-8/ for background.

let mut buffer = Vec::new();
let mut safe = SandboxedTTYWriter::new(&mut buffer);
safe.write(&[0x80, 0x80, 0x80, 0x80])?;
assert_eq!(buffer, "\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}".as_bytes());

let mut buffer = Vec::new();
let mut safe = SandboxedTTYWriter::new(&mut buffer);
safe.write(&[0xF0, 0x80, 0x80, 0x41])?;
assert_eq!(buffer, "\u{FFFD}\u{FFFD}\u{FFFD}A".as_bytes());

let mut buffer = Vec::new();
let mut safe = SandboxedTTYWriter::new(&mut buffer);
safe.write(&[0xF0, 0x80, 0x80])?;
assert_eq!(buffer, "\u{FFFD}\u{FFFD}\u{FFFD}".as_bytes());

let mut buffer = Vec::new();
let mut safe = SandboxedTTYWriter::new(&mut buffer);
safe.write(&[0xF4, 0x80, 0x80, 0xC0])?;
assert_eq!(buffer, "\u{FFFD}\u{FFFD}".as_bytes());

Ok(())
}
}